llama_cpp 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +18 -2
- data/ext/llama_cpp/extconf.rb +2 -1
- data/ext/llama_cpp/llama_cpp.cpp +315 -8
- data/ext/llama_cpp/src/ggml-alloc.c +541 -0
- data/ext/llama_cpp/src/ggml-alloc.h +22 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +2271 -414
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +7 -0
- data/ext/llama_cpp/src/ggml-metal.m +218 -87
- data/ext/llama_cpp/src/ggml-metal.metal +72 -55
- data/ext/llama_cpp/src/ggml.c +754 -996
- data/ext/llama_cpp/src/ggml.h +94 -18
- data/ext/llama_cpp/src/k_quants.c +350 -24
- data/ext/llama_cpp/src/llama.cpp +713 -179
- data/ext/llama_cpp/src/llama.h +61 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +26 -0
- metadata +4 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -3440,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
3440
3440
|
|
3441
3441
|
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
|
3442
3442
|
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
3443
|
-
#if defined(
|
3443
|
+
#if defined(GGML_USE_ACCELERATE)
|
3444
|
+
vDSP_vsmul(y, 1, &v, y, 1, n);
|
3445
|
+
#elif defined(GGML_SIMD)
|
3444
3446
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
3445
3447
|
|
3446
3448
|
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
|
@@ -3603,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
3603
3605
|
#endif
|
3604
3606
|
}
|
3605
3607
|
|
3606
|
-
inline static void
|
3608
|
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
3607
3609
|
ggml_float sum = 0.0;
|
3608
3610
|
for (int i = 0; i < n; ++i) {
|
3609
3611
|
sum += (ggml_float)x[i];
|
@@ -3611,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x
|
|
3611
3613
|
*s = sum;
|
3612
3614
|
}
|
3613
3615
|
|
3616
|
+
inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) {
|
3617
|
+
float sum = 0.0f;
|
3618
|
+
for (int i = 0; i < n; ++i) {
|
3619
|
+
sum += GGML_FP16_TO_FP32(x[i]);
|
3620
|
+
}
|
3621
|
+
*s = sum;
|
3622
|
+
}
|
3623
|
+
|
3614
3624
|
inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
|
3615
3625
|
#ifndef GGML_USE_ACCELERATE
|
3616
3626
|
float max = -INFINITY;
|
@@ -3750,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3750
3760
|
"ARGMAX",
|
3751
3761
|
"REPEAT",
|
3752
3762
|
"REPEAT_BACK",
|
3753
|
-
"ABS",
|
3754
|
-
"SGN",
|
3755
|
-
"NEG",
|
3756
|
-
"STEP",
|
3757
|
-
"TANH",
|
3758
|
-
"ELU",
|
3759
|
-
"RELU",
|
3760
|
-
"GELU",
|
3761
|
-
"GELU_QUICK",
|
3762
|
-
"SILU",
|
3763
3763
|
"SILU_BACK",
|
3764
3764
|
"NORM",
|
3765
3765
|
"RMS_NORM",
|
@@ -3798,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3798
3798
|
"WIN_PART",
|
3799
3799
|
"WIN_UNPART",
|
3800
3800
|
|
3801
|
+
"UNARY",
|
3802
|
+
|
3801
3803
|
"MAP_UNARY",
|
3802
3804
|
"MAP_BINARY",
|
3803
3805
|
|
@@ -3809,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3809
3811
|
"CROSS_ENTROPY_LOSS_BACK",
|
3810
3812
|
};
|
3811
3813
|
|
3812
|
-
static_assert(GGML_OP_COUNT ==
|
3814
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3813
3815
|
|
3814
3816
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
3815
3817
|
"none",
|
@@ -3830,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3830
3832
|
"argmax(x)",
|
3831
3833
|
"repeat(x)",
|
3832
3834
|
"repeat_back(x)",
|
3833
|
-
"abs(x)",
|
3834
|
-
"sgn(x)",
|
3835
|
-
"-x",
|
3836
|
-
"step(x)",
|
3837
|
-
"tanh(x)",
|
3838
|
-
"elu(x)",
|
3839
|
-
"relu(x)",
|
3840
|
-
"gelu(x)",
|
3841
|
-
"gelu_quick(x)",
|
3842
|
-
"silu(x)",
|
3843
3835
|
"silu_back(x)",
|
3844
3836
|
"norm(x)",
|
3845
3837
|
"rms_norm(x)",
|
@@ -3878,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3878
3870
|
"win_part(x)",
|
3879
3871
|
"win_unpart(x)",
|
3880
3872
|
|
3873
|
+
"unary(x)",
|
3874
|
+
|
3881
3875
|
"f(x)",
|
3882
3876
|
"f(x,y)",
|
3883
3877
|
|
@@ -3889,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3889
3883
|
"cross_entropy_loss_back(x,y)",
|
3890
3884
|
};
|
3891
3885
|
|
3892
|
-
static_assert(GGML_OP_COUNT ==
|
3886
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3893
3887
|
|
3894
3888
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
3895
3889
|
|
@@ -4077,8 +4071,8 @@ bool ggml_is_numa(void) {
|
|
4077
4071
|
////////////////////////////////////////////////////////////////////////////////
|
4078
4072
|
|
4079
4073
|
void ggml_print_object(const struct ggml_object * obj) {
|
4080
|
-
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
4081
|
-
obj->offs, obj->size, (const void *) obj->next);
|
4074
|
+
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
4075
|
+
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
4082
4076
|
}
|
4083
4077
|
|
4084
4078
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
@@ -4145,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) {
|
|
4145
4139
|
return GGML_OP_NAME[op];
|
4146
4140
|
}
|
4147
4141
|
|
4142
|
+
const char * ggml_op_symbol(enum ggml_op op) {
|
4143
|
+
return GGML_OP_SYMBOL[op];
|
4144
|
+
}
|
4145
|
+
|
4148
4146
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
4149
4147
|
return GGML_TYPE_SIZE[tensor->type];
|
4150
4148
|
}
|
@@ -4214,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
4214
4212
|
}
|
4215
4213
|
|
4216
4214
|
size_t ggml_tensor_overhead(void) {
|
4217
|
-
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE
|
4215
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
4218
4216
|
}
|
4219
4217
|
|
4220
4218
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
@@ -4231,6 +4229,15 @@ bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
|
4231
4229
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4232
4230
|
}
|
4233
4231
|
|
4232
|
+
static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) {
|
4233
|
+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
|
+
|
4235
|
+
return
|
4236
|
+
tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
|
4237
|
+
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
4238
|
+
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4239
|
+
}
|
4240
|
+
|
4234
4241
|
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
4235
4242
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4236
4243
|
|
@@ -4376,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
4376
4383
|
return NULL;
|
4377
4384
|
}
|
4378
4385
|
|
4379
|
-
const size_t mem_size =
|
4386
|
+
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
4380
4387
|
|
4381
4388
|
*ctx = (struct ggml_context) {
|
4382
4389
|
/*.mem_size =*/ mem_size,
|
@@ -4443,6 +4450,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4443
4450
|
return result;
|
4444
4451
|
}
|
4445
4452
|
|
4453
|
+
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
4454
|
+
return ctx->no_alloc;
|
4455
|
+
}
|
4456
|
+
|
4446
4457
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4447
4458
|
ctx->no_alloc = no_alloc;
|
4448
4459
|
}
|
@@ -4461,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4461
4472
|
struct ggml_object * obj = ctx->objects_begin;
|
4462
4473
|
|
4463
4474
|
while (obj != NULL) {
|
4464
|
-
|
4475
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
4476
|
+
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
4465
4477
|
|
4466
|
-
|
4478
|
+
const size_t size = ggml_nbytes(tensor);
|
4467
4479
|
|
4468
|
-
|
4469
|
-
|
4480
|
+
if (max_size < size) {
|
4481
|
+
max_size = size;
|
4482
|
+
}
|
4470
4483
|
}
|
4471
4484
|
|
4472
4485
|
obj = obj->next;
|
@@ -4480,7 +4493,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4480
4493
|
// this is an error prone process, but it is necessary to support inplace
|
4481
4494
|
// operators when using scratch buffers
|
4482
4495
|
// TODO: implement a better way
|
4483
|
-
void ggml_scratch_save(struct ggml_context * ctx) {
|
4496
|
+
static void ggml_scratch_save(struct ggml_context * ctx) {
|
4484
4497
|
// this is needed to allow opt tensors to store their data
|
4485
4498
|
// TODO: again, need to find a better way
|
4486
4499
|
ctx->no_alloc_save = ctx->no_alloc;
|
@@ -4490,7 +4503,7 @@ void ggml_scratch_save(struct ggml_context * ctx) {
|
|
4490
4503
|
ctx->scratch.data = NULL;
|
4491
4504
|
}
|
4492
4505
|
|
4493
|
-
void ggml_scratch_load(struct ggml_context * ctx) {
|
4506
|
+
static void ggml_scratch_load(struct ggml_context * ctx) {
|
4494
4507
|
ctx->no_alloc = ctx->no_alloc_save;
|
4495
4508
|
|
4496
4509
|
ctx->scratch = ctx->scratch_save;
|
@@ -4498,12 +4511,7 @@ void ggml_scratch_load(struct ggml_context * ctx) {
|
|
4498
4511
|
|
4499
4512
|
////////////////////////////////////////////////////////////////////////////////
|
4500
4513
|
|
4501
|
-
struct
|
4502
|
-
struct ggml_context * ctx,
|
4503
|
-
enum ggml_type type,
|
4504
|
-
int n_dims,
|
4505
|
-
const int64_t* ne,
|
4506
|
-
void* data) {
|
4514
|
+
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
4507
4515
|
// always insert objects at the end of the context's memory pool
|
4508
4516
|
struct ggml_object * obj_cur = ctx->objects_end;
|
4509
4517
|
|
@@ -4511,77 +4519,81 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4511
4519
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
4512
4520
|
const size_t cur_end = cur_offs + cur_size;
|
4513
4521
|
|
4514
|
-
|
4515
|
-
|
4516
|
-
if (data == NULL && !ctx->no_alloc) {
|
4517
|
-
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4518
|
-
for (int i = 1; i < n_dims; i++) {
|
4519
|
-
size_needed *= ne[i];
|
4520
|
-
}
|
4521
|
-
// align to GGML_MEM_ALIGN
|
4522
|
-
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
4523
|
-
}
|
4522
|
+
// align to GGML_MEM_ALIGN
|
4523
|
+
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
4524
4524
|
|
4525
4525
|
char * const mem_buffer = ctx->mem_buffer;
|
4526
4526
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4527
4527
|
|
4528
|
-
if (
|
4529
|
-
|
4528
|
+
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4529
|
+
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4530
|
+
__func__, cur_end + size_needed, ctx->mem_size);
|
4531
|
+
assert(false);
|
4532
|
+
return NULL;
|
4533
|
+
}
|
4534
|
+
|
4535
|
+
*obj_new = (struct ggml_object) {
|
4536
|
+
.offs = cur_end + GGML_OBJECT_SIZE,
|
4537
|
+
.size = size_needed,
|
4538
|
+
.next = NULL,
|
4539
|
+
.type = type,
|
4540
|
+
};
|
4530
4541
|
|
4531
|
-
|
4532
|
-
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4533
|
-
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
4534
|
-
assert(false);
|
4535
|
-
return NULL;
|
4536
|
-
}
|
4542
|
+
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
4537
4543
|
|
4538
|
-
|
4539
|
-
|
4540
|
-
.size = size_needed,
|
4541
|
-
.next = NULL,
|
4542
|
-
};
|
4544
|
+
if (obj_cur != NULL) {
|
4545
|
+
obj_cur->next = obj_new;
|
4543
4546
|
} else {
|
4544
|
-
|
4545
|
-
|
4546
|
-
|
4547
|
-
|
4548
|
-
|
4547
|
+
// this is the first object in this context
|
4548
|
+
ctx->objects_begin = obj_new;
|
4549
|
+
}
|
4550
|
+
|
4551
|
+
ctx->objects_end = obj_new;
|
4552
|
+
|
4553
|
+
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4554
|
+
|
4555
|
+
return obj_new;
|
4556
|
+
}
|
4557
|
+
|
4558
|
+
static struct ggml_tensor * ggml_new_tensor_impl(
|
4559
|
+
struct ggml_context * ctx,
|
4560
|
+
enum ggml_type type,
|
4561
|
+
int n_dims,
|
4562
|
+
const int64_t * ne,
|
4563
|
+
void * data) {
|
4564
|
+
|
4565
|
+
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
|
4566
|
+
|
4567
|
+
size_t data_size = 0;
|
4568
|
+
|
4569
|
+
if (data == NULL && !ctx->no_alloc) {
|
4570
|
+
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4571
|
+
for (int i = 1; i < n_dims; i++) {
|
4572
|
+
data_size *= ne[i];
|
4549
4573
|
}
|
4574
|
+
}
|
4550
4575
|
|
4551
|
-
|
4552
|
-
|
4553
|
-
|
4576
|
+
if (ctx->scratch.data != NULL && data == NULL) {
|
4577
|
+
// allocate tensor data in the scratch buffer
|
4578
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4579
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4580
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4554
4581
|
assert(false);
|
4555
4582
|
return NULL;
|
4556
4583
|
}
|
4557
4584
|
|
4558
4585
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4559
4586
|
|
4560
|
-
|
4561
|
-
.offs = cur_end + GGML_OBJECT_SIZE,
|
4562
|
-
.size = GGML_TENSOR_SIZE,
|
4563
|
-
.next = NULL,
|
4564
|
-
};
|
4565
|
-
|
4566
|
-
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
4587
|
+
ctx->scratch.offs += data_size;
|
4567
4588
|
|
4568
|
-
|
4589
|
+
data_size = 0;
|
4569
4590
|
}
|
4570
4591
|
|
4571
|
-
|
4572
|
-
obj_cur->next = obj_new;
|
4573
|
-
} else {
|
4574
|
-
// this is the first object in this context
|
4575
|
-
ctx->objects_begin = obj_new;
|
4576
|
-
}
|
4577
|
-
|
4578
|
-
ctx->objects_end = obj_new;
|
4592
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
4579
4593
|
|
4580
|
-
//
|
4594
|
+
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4581
4595
|
|
4582
|
-
struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
|
4583
|
-
|
4584
|
-
ggml_assert_aligned(result);
|
4596
|
+
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
4585
4597
|
|
4586
4598
|
*result = (struct ggml_tensor) {
|
4587
4599
|
/*.type =*/ type,
|
@@ -4590,6 +4602,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4590
4602
|
/*.ne =*/ { 1, 1, 1, 1 },
|
4591
4603
|
/*.nb =*/ { 0, 0, 0, 0 },
|
4592
4604
|
/*.op =*/ GGML_OP_NONE,
|
4605
|
+
/*.op_params =*/ {0},
|
4593
4606
|
/*.is_param =*/ false,
|
4594
4607
|
/*.grad =*/ NULL,
|
4595
4608
|
/*.src =*/ { NULL },
|
@@ -4620,24 +4633,39 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4620
4633
|
return result;
|
4621
4634
|
}
|
4622
4635
|
|
4636
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4637
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4638
|
+
memcpy(tensor->op_params, params, params_size);
|
4639
|
+
}
|
4640
|
+
|
4641
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4642
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4643
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4644
|
+
}
|
4645
|
+
|
4646
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4647
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4648
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4649
|
+
}
|
4650
|
+
|
4623
4651
|
struct ggml_tensor * ggml_new_tensor(
|
4624
4652
|
struct ggml_context * ctx,
|
4625
|
-
enum ggml_type
|
4626
|
-
int
|
4627
|
-
const int64_t
|
4653
|
+
enum ggml_type type,
|
4654
|
+
int n_dims,
|
4655
|
+
const int64_t * ne) {
|
4628
4656
|
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
4629
4657
|
}
|
4630
4658
|
|
4631
4659
|
struct ggml_tensor * ggml_new_tensor_1d(
|
4632
4660
|
struct ggml_context * ctx,
|
4633
|
-
enum ggml_type
|
4661
|
+
enum ggml_type type,
|
4634
4662
|
int64_t ne0) {
|
4635
4663
|
return ggml_new_tensor(ctx, type, 1, &ne0);
|
4636
4664
|
}
|
4637
4665
|
|
4638
4666
|
struct ggml_tensor * ggml_new_tensor_2d(
|
4639
4667
|
struct ggml_context * ctx,
|
4640
|
-
enum ggml_type
|
4668
|
+
enum ggml_type type,
|
4641
4669
|
int64_t ne0,
|
4642
4670
|
int64_t ne1) {
|
4643
4671
|
const int64_t ne[2] = { ne0, ne1 };
|
@@ -4646,7 +4674,7 @@ struct ggml_tensor * ggml_new_tensor_2d(
|
|
4646
4674
|
|
4647
4675
|
struct ggml_tensor * ggml_new_tensor_3d(
|
4648
4676
|
struct ggml_context * ctx,
|
4649
|
-
enum ggml_type
|
4677
|
+
enum ggml_type type,
|
4650
4678
|
int64_t ne0,
|
4651
4679
|
int64_t ne1,
|
4652
4680
|
int64_t ne2) {
|
@@ -4951,6 +4979,11 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
4951
4979
|
return (float *)(tensor->data);
|
4952
4980
|
}
|
4953
4981
|
|
4982
|
+
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
4983
|
+
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
4984
|
+
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
4985
|
+
}
|
4986
|
+
|
4954
4987
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
4955
4988
|
return tensor->name;
|
4956
4989
|
}
|
@@ -4989,9 +5022,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
4989
5022
|
char * const mem_buffer = ctx->mem_buffer;
|
4990
5023
|
|
4991
5024
|
while (obj != NULL) {
|
4992
|
-
|
4993
|
-
|
4994
|
-
|
5025
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
5026
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
5027
|
+
if (strcmp(cur->name, name) == 0) {
|
5028
|
+
return cur;
|
5029
|
+
}
|
4995
5030
|
}
|
4996
5031
|
|
4997
5032
|
obj = obj->next;
|
@@ -5004,7 +5039,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
5004
5039
|
|
5005
5040
|
// ggml_dup
|
5006
5041
|
|
5007
|
-
struct ggml_tensor * ggml_dup_impl(
|
5042
|
+
static struct ggml_tensor * ggml_dup_impl(
|
5008
5043
|
struct ggml_context * ctx,
|
5009
5044
|
struct ggml_tensor * a,
|
5010
5045
|
bool inplace) {
|
@@ -5019,7 +5054,6 @@ struct ggml_tensor * ggml_dup_impl(
|
|
5019
5054
|
result->op = GGML_OP_DUP;
|
5020
5055
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5021
5056
|
result->src[0] = a;
|
5022
|
-
result->src[1] = NULL;
|
5023
5057
|
|
5024
5058
|
return result;
|
5025
5059
|
}
|
@@ -5038,7 +5072,7 @@ struct ggml_tensor * ggml_dup_inplace(
|
|
5038
5072
|
|
5039
5073
|
// ggml_add
|
5040
5074
|
|
5041
|
-
struct ggml_tensor * ggml_add_impl(
|
5075
|
+
static struct ggml_tensor * ggml_add_impl(
|
5042
5076
|
struct ggml_context * ctx,
|
5043
5077
|
struct ggml_tensor * a,
|
5044
5078
|
struct ggml_tensor * b,
|
@@ -5081,7 +5115,7 @@ struct ggml_tensor * ggml_add_inplace(
|
|
5081
5115
|
|
5082
5116
|
// ggml_add1
|
5083
5117
|
|
5084
|
-
struct ggml_tensor * ggml_add1_impl(
|
5118
|
+
static struct ggml_tensor * ggml_add1_impl(
|
5085
5119
|
struct ggml_context * ctx,
|
5086
5120
|
struct ggml_tensor * a,
|
5087
5121
|
struct ggml_tensor * b,
|
@@ -5121,7 +5155,7 @@ struct ggml_tensor * ggml_add1_inplace(
|
|
5121
5155
|
|
5122
5156
|
// ggml_acc
|
5123
5157
|
|
5124
|
-
struct ggml_tensor * ggml_acc_impl(
|
5158
|
+
static struct ggml_tensor * ggml_acc_impl(
|
5125
5159
|
struct ggml_context * ctx,
|
5126
5160
|
struct ggml_tensor * a,
|
5127
5161
|
struct ggml_tensor * b,
|
@@ -5143,23 +5177,13 @@ struct ggml_tensor * ggml_acc_impl(
|
|
5143
5177
|
|
5144
5178
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5145
5179
|
|
5146
|
-
|
5147
|
-
|
5148
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
5149
|
-
|
5150
|
-
((int32_t *) c->data)[0] = nb1;
|
5151
|
-
((int32_t *) c->data)[1] = nb2;
|
5152
|
-
((int32_t *) c->data)[2] = nb3;
|
5153
|
-
((int32_t *) c->data)[3] = offset;
|
5154
|
-
((int32_t *) c->data)[4] = inplace ? 1 : 0;
|
5155
|
-
|
5156
|
-
ggml_scratch_load(ctx);
|
5180
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5181
|
+
ggml_set_op_params(result, params, sizeof(params));
|
5157
5182
|
|
5158
5183
|
result->op = GGML_OP_ACC;
|
5159
5184
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5160
5185
|
result->src[0] = a;
|
5161
5186
|
result->src[1] = b;
|
5162
|
-
result->src[2] = c;
|
5163
5187
|
|
5164
5188
|
return result;
|
5165
5189
|
}
|
@@ -5188,7 +5212,7 @@ struct ggml_tensor * ggml_acc_inplace(
|
|
5188
5212
|
|
5189
5213
|
// ggml_sub
|
5190
5214
|
|
5191
|
-
struct ggml_tensor * ggml_sub_impl(
|
5215
|
+
static struct ggml_tensor * ggml_sub_impl(
|
5192
5216
|
struct ggml_context * ctx,
|
5193
5217
|
struct ggml_tensor * a,
|
5194
5218
|
struct ggml_tensor * b,
|
@@ -5227,7 +5251,7 @@ struct ggml_tensor * ggml_sub_inplace(
|
|
5227
5251
|
|
5228
5252
|
// ggml_mul
|
5229
5253
|
|
5230
|
-
struct ggml_tensor * ggml_mul_impl(
|
5254
|
+
static struct ggml_tensor * ggml_mul_impl(
|
5231
5255
|
struct ggml_context * ctx,
|
5232
5256
|
struct ggml_tensor * a,
|
5233
5257
|
struct ggml_tensor * b,
|
@@ -5274,7 +5298,7 @@ struct ggml_tensor * ggml_mul_inplace(
|
|
5274
5298
|
|
5275
5299
|
// ggml_div
|
5276
5300
|
|
5277
|
-
struct ggml_tensor * ggml_div_impl(
|
5301
|
+
static struct ggml_tensor * ggml_div_impl(
|
5278
5302
|
struct ggml_context * ctx,
|
5279
5303
|
struct ggml_tensor * a,
|
5280
5304
|
struct ggml_tensor * b,
|
@@ -5317,7 +5341,7 @@ struct ggml_tensor * ggml_div_inplace(
|
|
5317
5341
|
|
5318
5342
|
// ggml_sqr
|
5319
5343
|
|
5320
|
-
struct ggml_tensor * ggml_sqr_impl(
|
5344
|
+
static struct ggml_tensor * ggml_sqr_impl(
|
5321
5345
|
struct ggml_context * ctx,
|
5322
5346
|
struct ggml_tensor * a,
|
5323
5347
|
bool inplace) {
|
@@ -5332,7 +5356,6 @@ struct ggml_tensor * ggml_sqr_impl(
|
|
5332
5356
|
result->op = GGML_OP_SQR;
|
5333
5357
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5334
5358
|
result->src[0] = a;
|
5335
|
-
result->src[1] = NULL;
|
5336
5359
|
|
5337
5360
|
return result;
|
5338
5361
|
}
|
@@ -5351,7 +5374,7 @@ struct ggml_tensor * ggml_sqr_inplace(
|
|
5351
5374
|
|
5352
5375
|
// ggml_sqrt
|
5353
5376
|
|
5354
|
-
struct ggml_tensor * ggml_sqrt_impl(
|
5377
|
+
static struct ggml_tensor * ggml_sqrt_impl(
|
5355
5378
|
struct ggml_context * ctx,
|
5356
5379
|
struct ggml_tensor * a,
|
5357
5380
|
bool inplace) {
|
@@ -5366,7 +5389,6 @@ struct ggml_tensor * ggml_sqrt_impl(
|
|
5366
5389
|
result->op = GGML_OP_SQRT;
|
5367
5390
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5368
5391
|
result->src[0] = a;
|
5369
|
-
result->src[1] = NULL;
|
5370
5392
|
|
5371
5393
|
return result;
|
5372
5394
|
}
|
@@ -5386,7 +5408,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
|
|
5386
5408
|
|
5387
5409
|
// ggml_log
|
5388
5410
|
|
5389
|
-
struct ggml_tensor * ggml_log_impl(
|
5411
|
+
static struct ggml_tensor * ggml_log_impl(
|
5390
5412
|
struct ggml_context * ctx,
|
5391
5413
|
struct ggml_tensor * a,
|
5392
5414
|
bool inplace) {
|
@@ -5401,7 +5423,6 @@ struct ggml_tensor * ggml_log_impl(
|
|
5401
5423
|
result->op = GGML_OP_LOG;
|
5402
5424
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5403
5425
|
result->src[0] = a;
|
5404
|
-
result->src[1] = NULL;
|
5405
5426
|
|
5406
5427
|
return result;
|
5407
5428
|
}
|
@@ -5434,7 +5455,6 @@ struct ggml_tensor * ggml_sum(
|
|
5434
5455
|
result->op = GGML_OP_SUM;
|
5435
5456
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5436
5457
|
result->src[0] = a;
|
5437
|
-
result->src[1] = NULL;
|
5438
5458
|
|
5439
5459
|
return result;
|
5440
5460
|
}
|
@@ -5461,7 +5481,6 @@ struct ggml_tensor * ggml_sum_rows(
|
|
5461
5481
|
result->op = GGML_OP_SUM_ROWS;
|
5462
5482
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5463
5483
|
result->src[0] = a;
|
5464
|
-
result->src[1] = NULL;
|
5465
5484
|
|
5466
5485
|
return result;
|
5467
5486
|
}
|
@@ -5484,7 +5503,6 @@ struct ggml_tensor * ggml_mean(
|
|
5484
5503
|
result->op = GGML_OP_MEAN;
|
5485
5504
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5486
5505
|
result->src[0] = a;
|
5487
|
-
result->src[1] = NULL;
|
5488
5506
|
|
5489
5507
|
return result;
|
5490
5508
|
}
|
@@ -5508,7 +5526,6 @@ struct ggml_tensor * ggml_argmax(
|
|
5508
5526
|
result->op = GGML_OP_ARGMAX;
|
5509
5527
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5510
5528
|
result->src[0] = a;
|
5511
|
-
result->src[1] = NULL;
|
5512
5529
|
|
5513
5530
|
return result;
|
5514
5531
|
}
|
@@ -5571,343 +5588,142 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5571
5588
|
|
5572
5589
|
// ggml_abs
|
5573
5590
|
|
5574
|
-
struct ggml_tensor * ggml_abs_impl(
|
5575
|
-
struct ggml_context * ctx,
|
5576
|
-
struct ggml_tensor * a,
|
5577
|
-
bool inplace) {
|
5578
|
-
bool is_node = false;
|
5579
|
-
|
5580
|
-
if (!inplace && (a->grad)) {
|
5581
|
-
is_node = true;
|
5582
|
-
}
|
5583
|
-
|
5584
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5585
|
-
|
5586
|
-
result->op = GGML_OP_ABS;
|
5587
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5588
|
-
result->src[0] = a;
|
5589
|
-
result->src[1] = NULL;
|
5590
|
-
|
5591
|
-
return result;
|
5592
|
-
}
|
5593
|
-
|
5594
5591
|
struct ggml_tensor * ggml_abs(
|
5595
5592
|
struct ggml_context * ctx,
|
5596
5593
|
struct ggml_tensor * a) {
|
5597
|
-
return
|
5594
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
|
5598
5595
|
}
|
5599
5596
|
|
5600
5597
|
struct ggml_tensor * ggml_abs_inplace(
|
5601
5598
|
struct ggml_context * ctx,
|
5602
5599
|
struct ggml_tensor * a) {
|
5603
|
-
return
|
5600
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
|
5604
5601
|
}
|
5605
5602
|
|
5606
|
-
|
5607
5603
|
// ggml_sgn
|
5608
5604
|
|
5609
|
-
struct ggml_tensor * ggml_sgn_impl(
|
5610
|
-
struct ggml_context * ctx,
|
5611
|
-
struct ggml_tensor * a,
|
5612
|
-
bool inplace) {
|
5613
|
-
bool is_node = false;
|
5614
|
-
|
5615
|
-
if (!inplace && (a->grad)) {
|
5616
|
-
is_node = true;
|
5617
|
-
}
|
5618
|
-
|
5619
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5620
|
-
|
5621
|
-
result->op = GGML_OP_SGN;
|
5622
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5623
|
-
result->src[0] = a;
|
5624
|
-
result->src[1] = NULL;
|
5625
|
-
|
5626
|
-
return result;
|
5627
|
-
}
|
5628
|
-
|
5629
5605
|
struct ggml_tensor * ggml_sgn(
|
5630
5606
|
struct ggml_context * ctx,
|
5631
5607
|
struct ggml_tensor * a) {
|
5632
|
-
return
|
5608
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
|
5633
5609
|
}
|
5634
5610
|
|
5635
5611
|
struct ggml_tensor * ggml_sgn_inplace(
|
5636
5612
|
struct ggml_context * ctx,
|
5637
5613
|
struct ggml_tensor * a) {
|
5638
|
-
return
|
5614
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
|
5639
5615
|
}
|
5640
5616
|
|
5641
5617
|
// ggml_neg
|
5642
5618
|
|
5643
|
-
struct ggml_tensor * ggml_neg_impl(
|
5644
|
-
struct ggml_context * ctx,
|
5645
|
-
struct ggml_tensor * a,
|
5646
|
-
bool inplace) {
|
5647
|
-
bool is_node = false;
|
5648
|
-
|
5649
|
-
if (!inplace && (a->grad)) {
|
5650
|
-
is_node = true;
|
5651
|
-
}
|
5652
|
-
|
5653
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5654
|
-
|
5655
|
-
result->op = GGML_OP_NEG;
|
5656
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5657
|
-
result->src[0] = a;
|
5658
|
-
result->src[1] = NULL;
|
5659
|
-
|
5660
|
-
return result;
|
5661
|
-
}
|
5662
|
-
|
5663
5619
|
struct ggml_tensor * ggml_neg(
|
5664
5620
|
struct ggml_context * ctx,
|
5665
5621
|
struct ggml_tensor * a) {
|
5666
|
-
return
|
5622
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
|
5667
5623
|
}
|
5668
5624
|
|
5669
5625
|
struct ggml_tensor * ggml_neg_inplace(
|
5670
5626
|
struct ggml_context * ctx,
|
5671
5627
|
struct ggml_tensor * a) {
|
5672
|
-
return
|
5628
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
|
5673
5629
|
}
|
5674
5630
|
|
5675
5631
|
// ggml_step
|
5676
5632
|
|
5677
|
-
struct ggml_tensor * ggml_step_impl(
|
5678
|
-
struct ggml_context * ctx,
|
5679
|
-
struct ggml_tensor * a,
|
5680
|
-
bool inplace) {
|
5681
|
-
bool is_node = false;
|
5682
|
-
|
5683
|
-
if (!inplace && (a->grad)) {
|
5684
|
-
is_node = true;
|
5685
|
-
}
|
5686
|
-
|
5687
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5688
|
-
|
5689
|
-
result->op = GGML_OP_STEP;
|
5690
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5691
|
-
result->src[0] = a;
|
5692
|
-
result->src[1] = NULL;
|
5693
|
-
|
5694
|
-
return result;
|
5695
|
-
}
|
5696
|
-
|
5697
5633
|
struct ggml_tensor * ggml_step(
|
5698
5634
|
struct ggml_context * ctx,
|
5699
5635
|
struct ggml_tensor * a) {
|
5700
|
-
return
|
5636
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
|
5701
5637
|
}
|
5702
5638
|
|
5703
5639
|
struct ggml_tensor * ggml_step_inplace(
|
5704
5640
|
struct ggml_context * ctx,
|
5705
5641
|
struct ggml_tensor * a) {
|
5706
|
-
return
|
5642
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
|
5707
5643
|
}
|
5708
5644
|
|
5709
5645
|
// ggml_tanh
|
5710
5646
|
|
5711
|
-
struct ggml_tensor * ggml_tanh_impl(
|
5712
|
-
struct ggml_context * ctx,
|
5713
|
-
struct ggml_tensor * a,
|
5714
|
-
bool inplace) {
|
5715
|
-
bool is_node = false;
|
5716
|
-
|
5717
|
-
if (!inplace && (a->grad)) {
|
5718
|
-
is_node = true;
|
5719
|
-
}
|
5720
|
-
|
5721
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5722
|
-
|
5723
|
-
result->op = GGML_OP_TANH;
|
5724
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5725
|
-
result->src[0] = a;
|
5726
|
-
result->src[1] = NULL;
|
5727
|
-
|
5728
|
-
return result;
|
5729
|
-
}
|
5730
|
-
|
5731
5647
|
struct ggml_tensor * ggml_tanh(
|
5732
5648
|
struct ggml_context * ctx,
|
5733
5649
|
struct ggml_tensor * a) {
|
5734
|
-
return
|
5650
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
|
5735
5651
|
}
|
5736
5652
|
|
5737
5653
|
struct ggml_tensor * ggml_tanh_inplace(
|
5738
5654
|
struct ggml_context * ctx,
|
5739
5655
|
struct ggml_tensor * a) {
|
5740
|
-
return
|
5656
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
|
5741
5657
|
}
|
5742
5658
|
|
5743
5659
|
// ggml_elu
|
5744
5660
|
|
5745
|
-
struct ggml_tensor * ggml_elu_impl(
|
5746
|
-
struct ggml_context * ctx,
|
5747
|
-
struct ggml_tensor * a,
|
5748
|
-
bool inplace) {
|
5749
|
-
bool is_node = false;
|
5750
|
-
|
5751
|
-
if (!inplace && (a->grad)) {
|
5752
|
-
is_node = true;
|
5753
|
-
}
|
5754
|
-
|
5755
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5756
|
-
|
5757
|
-
result->op = GGML_OP_ELU;
|
5758
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5759
|
-
result->src[0] = a;
|
5760
|
-
result->src[1] = NULL;
|
5761
|
-
|
5762
|
-
return result;
|
5763
|
-
}
|
5764
|
-
|
5765
5661
|
struct ggml_tensor * ggml_elu(
|
5766
5662
|
struct ggml_context * ctx,
|
5767
5663
|
struct ggml_tensor * a) {
|
5768
|
-
return
|
5664
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
|
5769
5665
|
}
|
5770
5666
|
|
5771
5667
|
struct ggml_tensor * ggml_elu_inplace(
|
5772
5668
|
struct ggml_context * ctx,
|
5773
5669
|
struct ggml_tensor * a) {
|
5774
|
-
return
|
5670
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
|
5775
5671
|
}
|
5776
5672
|
|
5777
5673
|
// ggml_relu
|
5778
5674
|
|
5779
|
-
struct ggml_tensor * ggml_relu_impl(
|
5780
|
-
struct ggml_context * ctx,
|
5781
|
-
struct ggml_tensor * a,
|
5782
|
-
bool inplace) {
|
5783
|
-
bool is_node = false;
|
5784
|
-
|
5785
|
-
if (!inplace && (a->grad)) {
|
5786
|
-
is_node = true;
|
5787
|
-
}
|
5788
|
-
|
5789
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5790
|
-
|
5791
|
-
result->op = GGML_OP_RELU;
|
5792
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5793
|
-
result->src[0] = a;
|
5794
|
-
result->src[1] = NULL;
|
5795
|
-
|
5796
|
-
return result;
|
5797
|
-
}
|
5798
|
-
|
5799
5675
|
struct ggml_tensor * ggml_relu(
|
5800
5676
|
struct ggml_context * ctx,
|
5801
5677
|
struct ggml_tensor * a) {
|
5802
|
-
return
|
5678
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
|
5803
5679
|
}
|
5804
5680
|
|
5805
5681
|
struct ggml_tensor * ggml_relu_inplace(
|
5806
5682
|
struct ggml_context * ctx,
|
5807
5683
|
struct ggml_tensor * a) {
|
5808
|
-
return
|
5684
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
5809
5685
|
}
|
5810
5686
|
|
5811
5687
|
// ggml_gelu
|
5812
5688
|
|
5813
|
-
struct ggml_tensor * ggml_gelu_impl(
|
5814
|
-
struct ggml_context * ctx,
|
5815
|
-
struct ggml_tensor * a,
|
5816
|
-
bool inplace) {
|
5817
|
-
bool is_node = false;
|
5818
|
-
|
5819
|
-
if (!inplace && (a->grad)) {
|
5820
|
-
is_node = true;
|
5821
|
-
}
|
5822
|
-
|
5823
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5824
|
-
|
5825
|
-
result->op = GGML_OP_GELU;
|
5826
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5827
|
-
result->src[0] = a;
|
5828
|
-
result->src[1] = NULL;
|
5829
|
-
|
5830
|
-
return result;
|
5831
|
-
}
|
5832
|
-
|
5833
5689
|
struct ggml_tensor * ggml_gelu(
|
5834
5690
|
struct ggml_context * ctx,
|
5835
5691
|
struct ggml_tensor * a) {
|
5836
|
-
return
|
5692
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
|
5837
5693
|
}
|
5838
5694
|
|
5839
5695
|
struct ggml_tensor * ggml_gelu_inplace(
|
5840
5696
|
struct ggml_context * ctx,
|
5841
5697
|
struct ggml_tensor * a) {
|
5842
|
-
return
|
5698
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
5843
5699
|
}
|
5844
5700
|
|
5845
5701
|
// ggml_gelu_quick
|
5846
5702
|
|
5847
|
-
struct ggml_tensor * ggml_gelu_quick_impl(
|
5848
|
-
struct ggml_context * ctx,
|
5849
|
-
struct ggml_tensor * a,
|
5850
|
-
bool inplace) {
|
5851
|
-
bool is_node = false;
|
5852
|
-
|
5853
|
-
if (!inplace && (a->grad)) {
|
5854
|
-
is_node = true;
|
5855
|
-
}
|
5856
|
-
|
5857
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5858
|
-
|
5859
|
-
result->op = GGML_OP_GELU_QUICK;
|
5860
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5861
|
-
result->src[0] = a;
|
5862
|
-
result->src[1] = NULL;
|
5863
|
-
|
5864
|
-
return result;
|
5865
|
-
}
|
5866
|
-
|
5867
5703
|
struct ggml_tensor * ggml_gelu_quick(
|
5868
5704
|
struct ggml_context * ctx,
|
5869
5705
|
struct ggml_tensor * a) {
|
5870
|
-
return
|
5706
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5871
5707
|
}
|
5872
5708
|
|
5873
5709
|
struct ggml_tensor * ggml_gelu_quick_inplace(
|
5874
5710
|
struct ggml_context * ctx,
|
5875
5711
|
struct ggml_tensor * a) {
|
5876
|
-
return
|
5712
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5877
5713
|
}
|
5878
5714
|
|
5879
5715
|
// ggml_silu
|
5880
5716
|
|
5881
|
-
struct ggml_tensor * ggml_silu_impl(
|
5882
|
-
struct ggml_context * ctx,
|
5883
|
-
struct ggml_tensor * a,
|
5884
|
-
bool inplace) {
|
5885
|
-
bool is_node = false;
|
5886
|
-
|
5887
|
-
if (!inplace && (a->grad)) {
|
5888
|
-
is_node = true;
|
5889
|
-
}
|
5890
|
-
|
5891
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5892
|
-
|
5893
|
-
result->op = GGML_OP_SILU;
|
5894
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5895
|
-
result->src[0] = a;
|
5896
|
-
result->src[1] = NULL;
|
5897
|
-
|
5898
|
-
return result;
|
5899
|
-
}
|
5900
|
-
|
5901
5717
|
struct ggml_tensor * ggml_silu(
|
5902
5718
|
struct ggml_context * ctx,
|
5903
5719
|
struct ggml_tensor * a) {
|
5904
|
-
return
|
5720
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
|
5905
5721
|
}
|
5906
5722
|
|
5907
5723
|
struct ggml_tensor * ggml_silu_inplace(
|
5908
5724
|
struct ggml_context * ctx,
|
5909
5725
|
struct ggml_tensor * a) {
|
5910
|
-
return
|
5726
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
5911
5727
|
}
|
5912
5728
|
|
5913
5729
|
// ggml_silu_back
|
@@ -5935,7 +5751,7 @@ struct ggml_tensor * ggml_silu_back(
|
|
5935
5751
|
|
5936
5752
|
// ggml_norm
|
5937
5753
|
|
5938
|
-
struct ggml_tensor * ggml_norm_impl(
|
5754
|
+
static struct ggml_tensor * ggml_norm_impl(
|
5939
5755
|
struct ggml_context * ctx,
|
5940
5756
|
struct ggml_tensor * a,
|
5941
5757
|
bool inplace) {
|
@@ -5948,10 +5764,11 @@ struct ggml_tensor * ggml_norm_impl(
|
|
5948
5764
|
|
5949
5765
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5950
5766
|
|
5767
|
+
// TODO: maybe store epsilon here?
|
5768
|
+
|
5951
5769
|
result->op = GGML_OP_NORM;
|
5952
5770
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5953
5771
|
result->src[0] = a;
|
5954
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5955
5772
|
|
5956
5773
|
return result;
|
5957
5774
|
}
|
@@ -5968,9 +5785,10 @@ struct ggml_tensor * ggml_norm_inplace(
|
|
5968
5785
|
return ggml_norm_impl(ctx, a, true);
|
5969
5786
|
}
|
5970
5787
|
|
5971
|
-
struct ggml_tensor * ggml_rms_norm_impl(
|
5788
|
+
static struct ggml_tensor * ggml_rms_norm_impl(
|
5972
5789
|
struct ggml_context * ctx,
|
5973
5790
|
struct ggml_tensor * a,
|
5791
|
+
float eps,
|
5974
5792
|
bool inplace) {
|
5975
5793
|
bool is_node = false;
|
5976
5794
|
|
@@ -5980,24 +5798,27 @@ struct ggml_tensor * ggml_rms_norm_impl(
|
|
5980
5798
|
|
5981
5799
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5982
5800
|
|
5801
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
5802
|
+
|
5983
5803
|
result->op = GGML_OP_RMS_NORM;
|
5984
5804
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5985
5805
|
result->src[0] = a;
|
5986
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5987
5806
|
|
5988
5807
|
return result;
|
5989
5808
|
}
|
5990
5809
|
|
5991
5810
|
struct ggml_tensor * ggml_rms_norm(
|
5992
5811
|
struct ggml_context * ctx,
|
5993
|
-
struct ggml_tensor * a
|
5994
|
-
|
5812
|
+
struct ggml_tensor * a,
|
5813
|
+
float eps) {
|
5814
|
+
return ggml_rms_norm_impl(ctx, a, eps, false);
|
5995
5815
|
}
|
5996
5816
|
|
5997
5817
|
struct ggml_tensor * ggml_rms_norm_inplace(
|
5998
5818
|
struct ggml_context * ctx,
|
5999
|
-
struct ggml_tensor * a
|
6000
|
-
|
5819
|
+
struct ggml_tensor * a,
|
5820
|
+
float eps) {
|
5821
|
+
return ggml_rms_norm_impl(ctx, a, eps, true);
|
6001
5822
|
}
|
6002
5823
|
|
6003
5824
|
struct ggml_tensor * ggml_rms_norm_back(
|
@@ -6076,7 +5897,7 @@ struct ggml_tensor * ggml_out_prod(
|
|
6076
5897
|
|
6077
5898
|
// ggml_scale
|
6078
5899
|
|
6079
|
-
struct ggml_tensor * ggml_scale_impl(
|
5900
|
+
static struct ggml_tensor * ggml_scale_impl(
|
6080
5901
|
struct ggml_context * ctx,
|
6081
5902
|
struct ggml_tensor * a,
|
6082
5903
|
struct ggml_tensor * b,
|
@@ -6116,7 +5937,7 @@ struct ggml_tensor * ggml_scale_inplace(
|
|
6116
5937
|
|
6117
5938
|
// ggml_set
|
6118
5939
|
|
6119
|
-
struct ggml_tensor * ggml_set_impl(
|
5940
|
+
static struct ggml_tensor * ggml_set_impl(
|
6120
5941
|
struct ggml_context * ctx,
|
6121
5942
|
struct ggml_tensor * a,
|
6122
5943
|
struct ggml_tensor * b,
|
@@ -6136,23 +5957,13 @@ struct ggml_tensor * ggml_set_impl(
|
|
6136
5957
|
// make a view of the destination
|
6137
5958
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6138
5959
|
|
6139
|
-
|
6140
|
-
|
6141
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
6142
|
-
|
6143
|
-
(( int32_t * ) c->data)[0] = nb1;
|
6144
|
-
(( int32_t * ) c->data)[1] = nb2;
|
6145
|
-
(( int32_t * ) c->data)[2] = nb3;
|
6146
|
-
(( int32_t * ) c->data)[3] = offset;
|
6147
|
-
(( int32_t * ) c->data)[4] = inplace ? 1 : 0;
|
6148
|
-
|
6149
|
-
ggml_scratch_load(ctx);
|
5960
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5961
|
+
ggml_set_op_params(result, params, sizeof(params));
|
6150
5962
|
|
6151
5963
|
result->op = GGML_OP_SET;
|
6152
5964
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6153
5965
|
result->src[0] = a;
|
6154
5966
|
result->src[1] = b;
|
6155
|
-
result->src[2] = c;
|
6156
5967
|
|
6157
5968
|
return result;
|
6158
5969
|
}
|
@@ -6216,7 +6027,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
|
|
6216
6027
|
|
6217
6028
|
// ggml_cpy
|
6218
6029
|
|
6219
|
-
struct ggml_tensor * ggml_cpy_impl(
|
6030
|
+
static struct ggml_tensor * ggml_cpy_impl(
|
6220
6031
|
struct ggml_context * ctx,
|
6221
6032
|
struct ggml_tensor * a,
|
6222
6033
|
struct ggml_tensor * b,
|
@@ -6261,7 +6072,7 @@ struct ggml_tensor * ggml_cpy_inplace(
|
|
6261
6072
|
|
6262
6073
|
// ggml_cont
|
6263
6074
|
|
6264
|
-
struct ggml_tensor * ggml_cont_impl(
|
6075
|
+
static struct ggml_tensor * ggml_cont_impl(
|
6265
6076
|
struct ggml_context * ctx,
|
6266
6077
|
struct ggml_tensor * a,
|
6267
6078
|
bool inplace) {
|
@@ -6277,7 +6088,6 @@ struct ggml_tensor * ggml_cont_impl(
|
|
6277
6088
|
result->op = GGML_OP_CONT;
|
6278
6089
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6279
6090
|
result->src[0] = a;
|
6280
|
-
result->src[1] = NULL;
|
6281
6091
|
|
6282
6092
|
return result;
|
6283
6093
|
}
|
@@ -6321,7 +6131,6 @@ struct ggml_tensor * ggml_reshape(
|
|
6321
6131
|
result->op = GGML_OP_RESHAPE;
|
6322
6132
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6323
6133
|
result->src[0] = a;
|
6324
|
-
result->src[1] = NULL;
|
6325
6134
|
|
6326
6135
|
return result;
|
6327
6136
|
}
|
@@ -6346,7 +6155,6 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6346
6155
|
result->op = GGML_OP_RESHAPE;
|
6347
6156
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6348
6157
|
result->src[0] = a;
|
6349
|
-
result->src[1] = NULL;
|
6350
6158
|
|
6351
6159
|
return result;
|
6352
6160
|
}
|
@@ -6372,7 +6180,6 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6372
6180
|
result->op = GGML_OP_RESHAPE;
|
6373
6181
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6374
6182
|
result->src[0] = a;
|
6375
|
-
result->src[1] = NULL;
|
6376
6183
|
|
6377
6184
|
return result;
|
6378
6185
|
}
|
@@ -6399,7 +6206,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6399
6206
|
result->op = GGML_OP_RESHAPE;
|
6400
6207
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6401
6208
|
result->src[0] = a;
|
6402
|
-
result->src[1] = NULL;
|
6403
6209
|
|
6404
6210
|
return result;
|
6405
6211
|
}
|
@@ -6428,13 +6234,33 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6428
6234
|
result->op = GGML_OP_RESHAPE;
|
6429
6235
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6430
6236
|
result->src[0] = a;
|
6431
|
-
result->src[1] = NULL;
|
6432
6237
|
|
6433
6238
|
return result;
|
6434
6239
|
}
|
6435
6240
|
|
6436
6241
|
// ggml_view_1d
|
6437
6242
|
|
6243
|
+
static struct ggml_tensor * ggml_view_tensor_offset(
|
6244
|
+
struct ggml_context * ctx,
|
6245
|
+
struct ggml_tensor * a,
|
6246
|
+
int n_dims,
|
6247
|
+
const int64_t * ne,
|
6248
|
+
size_t offset) {
|
6249
|
+
// don't calculate an offset from an unallocated tensor
|
6250
|
+
void * data = NULL;
|
6251
|
+
if (a->data != NULL) {
|
6252
|
+
data = (char *) a->data + offset;
|
6253
|
+
}
|
6254
|
+
|
6255
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
|
6256
|
+
|
6257
|
+
ggml_format_name(result, "%s (view)", a->name);
|
6258
|
+
|
6259
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6260
|
+
|
6261
|
+
return result;
|
6262
|
+
}
|
6263
|
+
|
6438
6264
|
struct ggml_tensor * ggml_view_1d(
|
6439
6265
|
struct ggml_context * ctx,
|
6440
6266
|
struct ggml_tensor * a,
|
@@ -6447,22 +6273,11 @@ struct ggml_tensor * ggml_view_1d(
|
|
6447
6273
|
is_node = true;
|
6448
6274
|
}
|
6449
6275
|
|
6450
|
-
struct ggml_tensor * result =
|
6451
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6452
|
-
|
6453
|
-
ggml_scratch_save(ctx);
|
6454
|
-
|
6455
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6456
|
-
ggml_set_name(offs, "offset");
|
6457
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6458
|
-
|
6459
|
-
ggml_scratch_load(ctx);
|
6276
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
|
6460
6277
|
|
6461
6278
|
result->op = GGML_OP_VIEW;
|
6462
6279
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6463
6280
|
result->src[0] = a;
|
6464
|
-
result->src[1] = NULL;
|
6465
|
-
result->src[2] = offs;
|
6466
6281
|
|
6467
6282
|
return result;
|
6468
6283
|
}
|
@@ -6485,16 +6300,7 @@ struct ggml_tensor * ggml_view_2d(
|
|
6485
6300
|
|
6486
6301
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
6487
6302
|
|
6488
|
-
struct ggml_tensor * result =
|
6489
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6490
|
-
|
6491
|
-
ggml_scratch_save(ctx);
|
6492
|
-
|
6493
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6494
|
-
ggml_set_name(offs, "offset");
|
6495
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6496
|
-
|
6497
|
-
ggml_scratch_load(ctx);
|
6303
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
|
6498
6304
|
|
6499
6305
|
result->nb[1] = nb1;
|
6500
6306
|
result->nb[2] = result->nb[1]*ne1;
|
@@ -6503,8 +6309,6 @@ struct ggml_tensor * ggml_view_2d(
|
|
6503
6309
|
result->op = GGML_OP_VIEW;
|
6504
6310
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6505
6311
|
result->src[0] = a;
|
6506
|
-
result->src[1] = NULL;
|
6507
|
-
result->src[2] = offs;
|
6508
6312
|
|
6509
6313
|
return result;
|
6510
6314
|
}
|
@@ -6529,16 +6333,7 @@ struct ggml_tensor * ggml_view_3d(
|
|
6529
6333
|
|
6530
6334
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
6531
6335
|
|
6532
|
-
struct ggml_tensor * result =
|
6533
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6534
|
-
|
6535
|
-
ggml_scratch_save(ctx);
|
6536
|
-
|
6537
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6538
|
-
ggml_set_name(offs, "offset");
|
6539
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6540
|
-
|
6541
|
-
ggml_scratch_load(ctx);
|
6336
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
|
6542
6337
|
|
6543
6338
|
result->nb[1] = nb1;
|
6544
6339
|
result->nb[2] = nb2;
|
@@ -6547,8 +6342,6 @@ struct ggml_tensor * ggml_view_3d(
|
|
6547
6342
|
result->op = GGML_OP_VIEW;
|
6548
6343
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6549
6344
|
result->src[0] = a;
|
6550
|
-
result->src[1] = NULL;
|
6551
|
-
result->src[2] = offs;
|
6552
6345
|
|
6553
6346
|
return result;
|
6554
6347
|
}
|
@@ -6575,16 +6368,7 @@ struct ggml_tensor * ggml_view_4d(
|
|
6575
6368
|
|
6576
6369
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
6577
6370
|
|
6578
|
-
struct ggml_tensor * result =
|
6579
|
-
ggml_format_name(result, "%s (view)", a->name);
|
6580
|
-
|
6581
|
-
ggml_scratch_save(ctx);
|
6582
|
-
|
6583
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6584
|
-
ggml_set_name(offs, "offset");
|
6585
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6586
|
-
|
6587
|
-
ggml_scratch_load(ctx);
|
6371
|
+
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
|
6588
6372
|
|
6589
6373
|
result->nb[1] = nb1;
|
6590
6374
|
result->nb[2] = nb2;
|
@@ -6593,8 +6377,6 @@ struct ggml_tensor * ggml_view_4d(
|
|
6593
6377
|
result->op = GGML_OP_VIEW;
|
6594
6378
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6595
6379
|
result->src[0] = a;
|
6596
|
-
result->src[1] = NULL;
|
6597
|
-
result->src[2] = offs;
|
6598
6380
|
|
6599
6381
|
return result;
|
6600
6382
|
}
|
@@ -6655,22 +6437,9 @@ struct ggml_tensor * ggml_permute(
|
|
6655
6437
|
result->op = GGML_OP_PERMUTE;
|
6656
6438
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6657
6439
|
result->src[0] = a;
|
6658
|
-
result->src[1] = NULL;
|
6659
6440
|
|
6660
|
-
|
6661
|
-
|
6662
|
-
|
6663
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
6664
|
-
|
6665
|
-
((int32_t *) b->data)[0] = axis0;
|
6666
|
-
((int32_t *) b->data)[1] = axis1;
|
6667
|
-
((int32_t *) b->data)[2] = axis2;
|
6668
|
-
((int32_t *) b->data)[3] = axis3;
|
6669
|
-
|
6670
|
-
ggml_scratch_load(ctx);
|
6671
|
-
|
6672
|
-
result->src[2] = b;
|
6673
|
-
}
|
6441
|
+
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
6442
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6674
6443
|
|
6675
6444
|
return result;
|
6676
6445
|
}
|
@@ -6698,7 +6467,6 @@ struct ggml_tensor * ggml_transpose(
|
|
6698
6467
|
result->op = GGML_OP_TRANSPOSE;
|
6699
6468
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6700
6469
|
result->src[0] = a;
|
6701
|
-
result->src[1] = NULL;
|
6702
6470
|
|
6703
6471
|
return result;
|
6704
6472
|
}
|
@@ -6776,7 +6544,6 @@ struct ggml_tensor * ggml_diag(
|
|
6776
6544
|
result->op = GGML_OP_DIAG;
|
6777
6545
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6778
6546
|
result->src[0] = a;
|
6779
|
-
result->src[1] = NULL;
|
6780
6547
|
|
6781
6548
|
return result;
|
6782
6549
|
}
|
@@ -6784,7 +6551,7 @@ struct ggml_tensor * ggml_diag(
|
|
6784
6551
|
|
6785
6552
|
// ggml_diag_mask_inf
|
6786
6553
|
|
6787
|
-
struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6554
|
+
static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6788
6555
|
struct ggml_context * ctx,
|
6789
6556
|
struct ggml_tensor * a,
|
6790
6557
|
int n_past,
|
@@ -6797,19 +6564,12 @@ struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6797
6564
|
|
6798
6565
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6799
6566
|
|
6800
|
-
|
6801
|
-
|
6802
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6803
|
-
|
6804
|
-
((int32_t *) b->data)[0] = n_past;
|
6805
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6806
|
-
|
6807
|
-
ggml_scratch_load(ctx);
|
6567
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6568
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6808
6569
|
|
6809
6570
|
result->op = GGML_OP_DIAG_MASK_INF;
|
6810
6571
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6811
6572
|
result->src[0] = a;
|
6812
|
-
result->src[1] = b;
|
6813
6573
|
|
6814
6574
|
return result;
|
6815
6575
|
}
|
@@ -6831,7 +6591,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
|
6831
6591
|
|
6832
6592
|
// ggml_diag_mask_zero
|
6833
6593
|
|
6834
|
-
struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6594
|
+
static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6835
6595
|
struct ggml_context * ctx,
|
6836
6596
|
struct ggml_tensor * a,
|
6837
6597
|
int n_past,
|
@@ -6844,20 +6604,12 @@ struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6844
6604
|
|
6845
6605
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6846
6606
|
|
6847
|
-
|
6848
|
-
|
6849
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6850
|
-
ggml_set_name(b, "n_past, inplace");
|
6851
|
-
|
6852
|
-
((int32_t *) b->data)[0] = n_past;
|
6853
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6854
|
-
|
6855
|
-
ggml_scratch_load(ctx);
|
6607
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6608
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6856
6609
|
|
6857
6610
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
6858
6611
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6859
6612
|
result->src[0] = a;
|
6860
|
-
result->src[1] = b;
|
6861
6613
|
|
6862
6614
|
return result;
|
6863
6615
|
}
|
@@ -6878,7 +6630,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
|
|
6878
6630
|
|
6879
6631
|
// ggml_soft_max
|
6880
6632
|
|
6881
|
-
struct ggml_tensor * ggml_soft_max_impl(
|
6633
|
+
static struct ggml_tensor * ggml_soft_max_impl(
|
6882
6634
|
struct ggml_context * ctx,
|
6883
6635
|
struct ggml_tensor * a,
|
6884
6636
|
bool inplace) {
|
@@ -6893,7 +6645,6 @@ struct ggml_tensor * ggml_soft_max_impl(
|
|
6893
6645
|
result->op = GGML_OP_SOFT_MAX;
|
6894
6646
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6895
6647
|
result->src[0] = a;
|
6896
|
-
result->src[1] = NULL;
|
6897
6648
|
|
6898
6649
|
return result;
|
6899
6650
|
}
|
@@ -6913,7 +6664,7 @@ struct ggml_tensor * ggml_soft_max_inplace(
|
|
6913
6664
|
|
6914
6665
|
// ggml_soft_max_back
|
6915
6666
|
|
6916
|
-
struct ggml_tensor * ggml_soft_max_back_impl(
|
6667
|
+
static struct ggml_tensor * ggml_soft_max_back_impl(
|
6917
6668
|
struct ggml_context * ctx,
|
6918
6669
|
struct ggml_tensor * a,
|
6919
6670
|
struct ggml_tensor * b,
|
@@ -6950,7 +6701,7 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
6950
6701
|
|
6951
6702
|
// ggml_rope
|
6952
6703
|
|
6953
|
-
struct ggml_tensor * ggml_rope_impl(
|
6704
|
+
static struct ggml_tensor * ggml_rope_impl(
|
6954
6705
|
struct ggml_context * ctx,
|
6955
6706
|
struct ggml_tensor * a,
|
6956
6707
|
int n_past,
|
@@ -6969,23 +6720,14 @@ struct ggml_tensor * ggml_rope_impl(
|
|
6969
6720
|
|
6970
6721
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6971
6722
|
|
6972
|
-
|
6973
|
-
|
6974
|
-
|
6975
|
-
|
6976
|
-
((int32_t *) b->data)[0] = n_past;
|
6977
|
-
((int32_t *) b->data)[1] = n_dims;
|
6978
|
-
((int32_t *) b->data)[2] = mode;
|
6979
|
-
((int32_t *) b->data)[3] = n_ctx;
|
6980
|
-
memcpy((int32_t *) b->data + 4, &freq_base, sizeof(float));
|
6981
|
-
memcpy((int32_t *) b->data + 5, &freq_scale, sizeof(float));
|
6982
|
-
|
6983
|
-
ggml_scratch_load(ctx);
|
6723
|
+
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
6724
|
+
memcpy(params + 4, &freq_base, sizeof(float));
|
6725
|
+
memcpy(params + 5, &freq_scale, sizeof(float));
|
6726
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6984
6727
|
|
6985
6728
|
result->op = GGML_OP_ROPE;
|
6986
6729
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6987
6730
|
result->src[0] = a;
|
6988
|
-
result->src[1] = b;
|
6989
6731
|
|
6990
6732
|
return result;
|
6991
6733
|
}
|
@@ -7010,6 +6752,18 @@ struct ggml_tensor * ggml_rope_inplace(
|
|
7010
6752
|
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
7011
6753
|
}
|
7012
6754
|
|
6755
|
+
struct ggml_tensor * ggml_rope_custom(
|
6756
|
+
struct ggml_context * ctx,
|
6757
|
+
struct ggml_tensor * a,
|
6758
|
+
int n_past,
|
6759
|
+
int n_dims,
|
6760
|
+
int mode,
|
6761
|
+
int n_ctx,
|
6762
|
+
float freq_base,
|
6763
|
+
float freq_scale) {
|
6764
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, false);
|
6765
|
+
}
|
6766
|
+
|
7013
6767
|
struct ggml_tensor * ggml_rope_custom_inplace(
|
7014
6768
|
struct ggml_context * ctx,
|
7015
6769
|
struct ggml_tensor * a,
|
@@ -7042,22 +6796,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
7042
6796
|
|
7043
6797
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7044
6798
|
|
7045
|
-
|
7046
|
-
|
7047
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
7048
|
-
ggml_set_name(b, "n_past, n_dims, mode");
|
7049
|
-
|
7050
|
-
((int32_t *) b->data)[0] = n_past;
|
7051
|
-
((int32_t *) b->data)[1] = n_dims;
|
7052
|
-
((int32_t *) b->data)[2] = mode;
|
7053
|
-
((int32_t *) b->data)[3] = n_ctx;
|
7054
|
-
|
7055
|
-
ggml_scratch_load(ctx);
|
6799
|
+
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
6800
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7056
6801
|
|
7057
6802
|
result->op = GGML_OP_ROPE_BACK;
|
7058
6803
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7059
6804
|
result->src[0] = a;
|
7060
|
-
result->src[1] = b;
|
7061
6805
|
|
7062
6806
|
return result;
|
7063
6807
|
}
|
@@ -7082,21 +6826,13 @@ struct ggml_tensor * ggml_alibi(
|
|
7082
6826
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7083
6827
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7084
6828
|
|
7085
|
-
|
7086
|
-
|
7087
|
-
|
7088
|
-
|
7089
|
-
((int32_t *) b->data)[0] = n_past;
|
7090
|
-
((int32_t *) b->data)[1] = n_head;
|
7091
|
-
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
|
7092
|
-
(((float *) b->data)[2]) = bias_max;
|
7093
|
-
|
7094
|
-
ggml_scratch_load(ctx);
|
6829
|
+
int32_t op_params[3] = { n_past, n_head };
|
6830
|
+
memcpy(op_params + 2, &bias_max, sizeof(float));
|
6831
|
+
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
7095
6832
|
|
7096
6833
|
result->op = GGML_OP_ALIBI;
|
7097
6834
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7098
6835
|
result->src[0] = a;
|
7099
|
-
result->src[1] = b;
|
7100
6836
|
|
7101
6837
|
return result;
|
7102
6838
|
}
|
@@ -7118,19 +6854,12 @@ struct ggml_tensor * ggml_clamp(
|
|
7118
6854
|
// TODO: when implement backward, fix this:
|
7119
6855
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7120
6856
|
|
7121
|
-
|
7122
|
-
|
7123
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
|
7124
|
-
|
7125
|
-
((float *) b->data)[0] = min;
|
7126
|
-
((float *) b->data)[1] = max;
|
7127
|
-
|
7128
|
-
ggml_scratch_load(ctx);
|
6857
|
+
float params[] = { min, max };
|
6858
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7129
6859
|
|
7130
6860
|
result->op = GGML_OP_CLAMP;
|
7131
6861
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7132
6862
|
result->src[0] = a;
|
7133
|
-
result->src[1] = b;
|
7134
6863
|
|
7135
6864
|
return result;
|
7136
6865
|
}
|
@@ -7163,18 +6892,13 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
7163
6892
|
};
|
7164
6893
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7165
6894
|
|
7166
|
-
|
7167
|
-
|
7168
|
-
((int32_t*)c->data)[0] = s0;
|
7169
|
-
((int32_t*)c->data)[1] = p0;
|
7170
|
-
((int32_t*)c->data)[2] = d0;
|
7171
|
-
ggml_scratch_load(ctx);
|
6895
|
+
int32_t params[] = { s0, p0, d0 };
|
6896
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7172
6897
|
|
7173
6898
|
result->op = GGML_OP_CONV_1D;
|
7174
6899
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7175
6900
|
result->src[0] = a;
|
7176
6901
|
result->src[1] = b;
|
7177
|
-
result->src[2] = c;
|
7178
6902
|
|
7179
6903
|
return result;
|
7180
6904
|
}
|
@@ -7207,21 +6931,13 @@ struct ggml_tensor* ggml_conv_2d(
|
|
7207
6931
|
};
|
7208
6932
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7209
6933
|
|
7210
|
-
|
7211
|
-
|
7212
|
-
((int32_t*)c->data)[0] = s0;
|
7213
|
-
((int32_t*)c->data)[1] = s1;
|
7214
|
-
((int32_t*)c->data)[2] = p0;
|
7215
|
-
((int32_t*)c->data)[3] = p1;
|
7216
|
-
((int32_t*)c->data)[4] = d0;
|
7217
|
-
((int32_t*)c->data)[5] = d1;
|
7218
|
-
ggml_scratch_load(ctx);
|
6934
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
6935
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7219
6936
|
|
7220
6937
|
result->op = GGML_OP_CONV_2D;
|
7221
6938
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7222
6939
|
result->src[0] = a;
|
7223
6940
|
result->src[1] = b;
|
7224
|
-
result->src[2] = c;
|
7225
6941
|
|
7226
6942
|
return result;
|
7227
6943
|
|
@@ -7245,7 +6961,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|
7245
6961
|
return (ins + 2 * p - ks) / s + 1;
|
7246
6962
|
}
|
7247
6963
|
|
7248
|
-
//
|
6964
|
+
// ggml_pool_1d
|
7249
6965
|
|
7250
6966
|
struct ggml_tensor* ggml_pool_1d(
|
7251
6967
|
struct ggml_context * ctx,
|
@@ -7268,18 +6984,12 @@ struct ggml_tensor* ggml_pool_1d(
|
|
7268
6984
|
};
|
7269
6985
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7270
6986
|
|
7271
|
-
|
7272
|
-
|
7273
|
-
((int32_t*)c->data)[0] = op;
|
7274
|
-
((int32_t*)c->data)[1] = k0;
|
7275
|
-
((int32_t*)c->data)[2] = s0;
|
7276
|
-
((int32_t*)c->data)[3] = p0;
|
7277
|
-
ggml_scratch_load(ctx);
|
6987
|
+
int32_t params[] = { op, k0, s0, p0 };
|
6988
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7278
6989
|
|
7279
6990
|
result->op = GGML_OP_POOL_1D;
|
7280
6991
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7281
6992
|
result->src[0] = a;
|
7282
|
-
result->src[1] = c;
|
7283
6993
|
|
7284
6994
|
return result;
|
7285
6995
|
}
|
@@ -7311,21 +7021,12 @@ struct ggml_tensor* ggml_pool_2d(
|
|
7311
7021
|
};
|
7312
7022
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7313
7023
|
|
7314
|
-
|
7315
|
-
|
7316
|
-
((int32_t*)c->data)[0] = op;
|
7317
|
-
((int32_t*)c->data)[1] = k0;
|
7318
|
-
((int32_t*)c->data)[2] = k1;
|
7319
|
-
((int32_t*)c->data)[3] = s0;
|
7320
|
-
((int32_t*)c->data)[4] = s1;
|
7321
|
-
((int32_t*)c->data)[5] = p0;
|
7322
|
-
((int32_t*)c->data)[6] = p1;
|
7323
|
-
ggml_scratch_load(ctx);
|
7024
|
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
7025
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7324
7026
|
|
7325
7027
|
result->op = GGML_OP_POOL_2D;
|
7326
7028
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7327
7029
|
result->src[0] = a;
|
7328
|
-
result->src[1] = c;
|
7329
7030
|
|
7330
7031
|
return result;
|
7331
7032
|
}
|
@@ -7348,14 +7049,16 @@ struct ggml_tensor * ggml_flash_attn(
|
|
7348
7049
|
}
|
7349
7050
|
|
7350
7051
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
7351
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7052
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
7053
|
+
|
7054
|
+
int32_t t = masked ? 1 : 0;
|
7055
|
+
ggml_set_op_params(result, &t, sizeof(t));
|
7352
7056
|
|
7353
7057
|
result->op = GGML_OP_FLASH_ATTN;
|
7354
7058
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7355
7059
|
result->src[0] = q;
|
7356
7060
|
result->src[1] = k;
|
7357
7061
|
result->src[2] = v;
|
7358
|
-
result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7359
7062
|
|
7360
7063
|
return result;
|
7361
7064
|
}
|
@@ -7379,7 +7082,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|
7379
7082
|
}
|
7380
7083
|
|
7381
7084
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7382
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7085
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
7383
7086
|
|
7384
7087
|
result->op = GGML_OP_FLASH_FF;
|
7385
7088
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
@@ -7445,13 +7148,15 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
7445
7148
|
|
7446
7149
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7447
7150
|
|
7151
|
+
int32_t masked_i = masked ? 1 : 0;
|
7152
|
+
ggml_set_op_params(result, &masked_i, sizeof(masked_i));
|
7153
|
+
|
7448
7154
|
result->op = GGML_OP_FLASH_ATTN_BACK;
|
7449
7155
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7450
7156
|
result->src[0] = q;
|
7451
7157
|
result->src[1] = k;
|
7452
7158
|
result->src[2] = v;
|
7453
7159
|
result->src[3] = d;
|
7454
|
-
result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7455
7160
|
|
7456
7161
|
return result;
|
7457
7162
|
}
|
@@ -7484,21 +7189,12 @@ struct ggml_tensor * ggml_win_part(
|
|
7484
7189
|
|
7485
7190
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7486
7191
|
|
7487
|
-
|
7488
|
-
|
7489
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7490
|
-
|
7491
|
-
((int32_t *) b->data)[0] = npx;
|
7492
|
-
((int32_t *) b->data)[1] = npy;
|
7493
|
-
((int32_t *) b->data)[2] = w;
|
7494
|
-
|
7495
|
-
ggml_scratch_load(ctx);
|
7192
|
+
int32_t params[] = { npx, npy, w };
|
7193
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7496
7194
|
|
7497
7195
|
result->op = GGML_OP_WIN_PART;
|
7498
7196
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7499
7197
|
result->src[0] = a;
|
7500
|
-
result->src[1] = NULL;
|
7501
|
-
result->src[2] = b;
|
7502
7198
|
|
7503
7199
|
return result;
|
7504
7200
|
}
|
@@ -7523,26 +7219,57 @@ struct ggml_tensor * ggml_win_unpart(
|
|
7523
7219
|
const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
|
7524
7220
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7525
7221
|
|
7526
|
-
|
7222
|
+
int32_t params[] = { w };
|
7223
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7224
|
+
|
7225
|
+
result->op = GGML_OP_WIN_UNPART;
|
7226
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7227
|
+
result->src[0] = a;
|
7527
7228
|
|
7528
|
-
|
7229
|
+
return result;
|
7230
|
+
}
|
7529
7231
|
|
7530
|
-
|
7232
|
+
// gmml_unary
|
7531
7233
|
|
7532
|
-
|
7234
|
+
static struct ggml_tensor * ggml_unary_impl(
|
7235
|
+
struct ggml_context * ctx,
|
7236
|
+
struct ggml_tensor * a,
|
7237
|
+
enum ggml_unary_op op,
|
7238
|
+
bool inplace) {
|
7239
|
+
bool is_node = false;
|
7533
7240
|
|
7534
|
-
|
7241
|
+
if (!inplace && (a->grad)) {
|
7242
|
+
is_node = true;
|
7243
|
+
}
|
7244
|
+
|
7245
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7246
|
+
|
7247
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
7248
|
+
|
7249
|
+
result->op = GGML_OP_UNARY;
|
7535
7250
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7536
7251
|
result->src[0] = a;
|
7537
|
-
result->src[1] = NULL;
|
7538
|
-
result->src[2] = b;
|
7539
7252
|
|
7540
7253
|
return result;
|
7541
7254
|
}
|
7542
7255
|
|
7256
|
+
struct ggml_tensor * ggml_unary(
|
7257
|
+
struct ggml_context * ctx,
|
7258
|
+
struct ggml_tensor * a,
|
7259
|
+
enum ggml_unary_op op) {
|
7260
|
+
return ggml_unary_impl(ctx, a, op, false);
|
7261
|
+
}
|
7262
|
+
|
7263
|
+
struct ggml_tensor * ggml_unary_inplace(
|
7264
|
+
struct ggml_context * ctx,
|
7265
|
+
struct ggml_tensor * a,
|
7266
|
+
enum ggml_unary_op op) {
|
7267
|
+
return ggml_unary_impl(ctx, a, op, true);
|
7268
|
+
}
|
7269
|
+
|
7543
7270
|
// ggml_map_unary
|
7544
7271
|
|
7545
|
-
struct ggml_tensor * ggml_map_unary_impl_f32(
|
7272
|
+
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
7546
7273
|
struct ggml_context * ctx,
|
7547
7274
|
struct ggml_tensor * a,
|
7548
7275
|
const ggml_unary_op_f32_t fun,
|
@@ -7553,19 +7280,13 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|
7553
7280
|
is_node = true;
|
7554
7281
|
}
|
7555
7282
|
|
7556
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7557
|
-
|
7558
|
-
ggml_scratch_save(ctx);
|
7283
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7559
7284
|
|
7560
|
-
|
7561
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7562
|
-
|
7563
|
-
ggml_scratch_load(ctx);
|
7285
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7564
7286
|
|
7565
7287
|
result->op = GGML_OP_MAP_UNARY;
|
7566
7288
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7567
7289
|
result->src[0] = a;
|
7568
|
-
result->src[2] = addr_tensor;
|
7569
7290
|
|
7570
7291
|
return result;
|
7571
7292
|
}
|
@@ -7586,7 +7307,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
7586
7307
|
|
7587
7308
|
// ggml_map_binary
|
7588
7309
|
|
7589
|
-
struct ggml_tensor * ggml_map_binary_impl_f32(
|
7310
|
+
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
7590
7311
|
struct ggml_context * ctx,
|
7591
7312
|
struct ggml_tensor * a,
|
7592
7313
|
struct ggml_tensor * b,
|
@@ -7600,20 +7321,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|
7600
7321
|
is_node = true;
|
7601
7322
|
}
|
7602
7323
|
|
7603
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7604
|
-
|
7605
|
-
ggml_scratch_save(ctx);
|
7606
|
-
|
7607
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7608
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7324
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7609
7325
|
|
7610
|
-
|
7326
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7611
7327
|
|
7612
7328
|
result->op = GGML_OP_MAP_BINARY;
|
7613
7329
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7614
7330
|
result->src[0] = a;
|
7615
7331
|
result->src[1] = b;
|
7616
|
-
result->src[2] = addr_tensor;
|
7617
7332
|
|
7618
7333
|
return result;
|
7619
7334
|
}
|
@@ -7636,7 +7351,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
7636
7351
|
|
7637
7352
|
// ggml_map_custom1
|
7638
7353
|
|
7639
|
-
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7354
|
+
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7640
7355
|
struct ggml_context * ctx,
|
7641
7356
|
struct ggml_tensor * a,
|
7642
7357
|
const ggml_custom1_op_f32_t fun,
|
@@ -7647,19 +7362,13 @@ struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|
7647
7362
|
is_node = true;
|
7648
7363
|
}
|
7649
7364
|
|
7650
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7651
|
-
|
7652
|
-
ggml_scratch_save(ctx);
|
7653
|
-
|
7654
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7655
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7365
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7656
7366
|
|
7657
|
-
|
7367
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7658
7368
|
|
7659
7369
|
result->op = GGML_OP_MAP_CUSTOM1;
|
7660
7370
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7661
7371
|
result->src[0] = a;
|
7662
|
-
result->src[2] = addr_tensor;
|
7663
7372
|
|
7664
7373
|
return result;
|
7665
7374
|
}
|
@@ -7680,7 +7389,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
7680
7389
|
|
7681
7390
|
// ggml_map_custom2
|
7682
7391
|
|
7683
|
-
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7392
|
+
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7684
7393
|
struct ggml_context * ctx,
|
7685
7394
|
struct ggml_tensor * a,
|
7686
7395
|
struct ggml_tensor * b,
|
@@ -7692,20 +7401,14 @@ struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|
7692
7401
|
is_node = true;
|
7693
7402
|
}
|
7694
7403
|
|
7695
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7696
|
-
|
7697
|
-
ggml_scratch_save(ctx);
|
7404
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7698
7405
|
|
7699
|
-
|
7700
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7701
|
-
|
7702
|
-
ggml_scratch_load(ctx);
|
7406
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7703
7407
|
|
7704
7408
|
result->op = GGML_OP_MAP_CUSTOM2;
|
7705
7409
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7706
7410
|
result->src[0] = a;
|
7707
7411
|
result->src[1] = b;
|
7708
|
-
result->src[2] = addr_tensor;
|
7709
7412
|
|
7710
7413
|
return result;
|
7711
7414
|
}
|
@@ -7728,7 +7431,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
7728
7431
|
|
7729
7432
|
// ggml_map_custom3
|
7730
7433
|
|
7731
|
-
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7434
|
+
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7732
7435
|
struct ggml_context * ctx,
|
7733
7436
|
struct ggml_tensor * a,
|
7734
7437
|
struct ggml_tensor * b,
|
@@ -7741,21 +7444,15 @@ struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|
7741
7444
|
is_node = true;
|
7742
7445
|
}
|
7743
7446
|
|
7744
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7745
|
-
|
7746
|
-
ggml_scratch_save(ctx);
|
7747
|
-
|
7748
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7749
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7447
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7750
7448
|
|
7751
|
-
|
7449
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7752
7450
|
|
7753
7451
|
result->op = GGML_OP_MAP_CUSTOM3;
|
7754
7452
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7755
7453
|
result->src[0] = a;
|
7756
7454
|
result->src[1] = b;
|
7757
|
-
result->src[2] =
|
7758
|
-
result->src[3] = c;
|
7455
|
+
result->src[2] = c;
|
7759
7456
|
|
7760
7457
|
return result;
|
7761
7458
|
}
|
@@ -8983,21 +8680,17 @@ static void ggml_compute_forward_acc_f32(
|
|
8983
8680
|
const struct ggml_compute_params * params,
|
8984
8681
|
const struct ggml_tensor * src0,
|
8985
8682
|
const struct ggml_tensor * src1,
|
8986
|
-
const struct ggml_tensor * opt0,
|
8987
8683
|
struct ggml_tensor * dst) {
|
8988
8684
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8989
8685
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
8990
8686
|
|
8991
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
8992
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
8993
|
-
|
8994
8687
|
// view src0 and dst with these strides and data offset inbytes during acc
|
8995
8688
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
8996
|
-
size_t nb1 = ((int32_t *)
|
8997
|
-
size_t nb2 = ((int32_t *)
|
8998
|
-
size_t nb3 = ((int32_t *)
|
8999
|
-
size_t offset = ((int32_t *)
|
9000
|
-
bool inplace = (bool) ((int32_t *)
|
8689
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
8690
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
8691
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
8692
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
8693
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
9001
8694
|
|
9002
8695
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
9003
8696
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -9066,13 +8759,12 @@ static void ggml_compute_forward_acc(
|
|
9066
8759
|
const struct ggml_compute_params * params,
|
9067
8760
|
const struct ggml_tensor * src0,
|
9068
8761
|
const struct ggml_tensor * src1,
|
9069
|
-
const struct ggml_tensor * opt0,
|
9070
8762
|
struct ggml_tensor * dst) {
|
9071
8763
|
|
9072
8764
|
switch (src0->type) {
|
9073
8765
|
case GGML_TYPE_F32:
|
9074
8766
|
{
|
9075
|
-
ggml_compute_forward_acc_f32(params, src0, src1,
|
8767
|
+
ggml_compute_forward_acc_f32(params, src0, src1, dst);
|
9076
8768
|
} break;
|
9077
8769
|
case GGML_TYPE_F16:
|
9078
8770
|
case GGML_TYPE_Q4_0:
|
@@ -9504,7 +9196,7 @@ static void ggml_compute_forward_sum_f32(
|
|
9504
9196
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9505
9197
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9506
9198
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9507
|
-
|
9199
|
+
ggml_vec_sum_f32_ggf(ne00,
|
9508
9200
|
&row_sum,
|
9509
9201
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
9510
9202
|
sum += row_sum;
|
@@ -9514,6 +9206,38 @@ static void ggml_compute_forward_sum_f32(
|
|
9514
9206
|
((float *) dst->data)[0] = sum;
|
9515
9207
|
}
|
9516
9208
|
|
9209
|
+
static void ggml_compute_forward_sum_f16(
|
9210
|
+
const struct ggml_compute_params * params,
|
9211
|
+
const struct ggml_tensor * src0,
|
9212
|
+
struct ggml_tensor * dst) {
|
9213
|
+
assert(params->ith == 0);
|
9214
|
+
assert(ggml_is_scalar(dst));
|
9215
|
+
|
9216
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
9217
|
+
return;
|
9218
|
+
}
|
9219
|
+
|
9220
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
9221
|
+
|
9222
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
9223
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
9224
|
+
|
9225
|
+
float sum = 0;
|
9226
|
+
float row_sum = 0;
|
9227
|
+
|
9228
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9229
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9230
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9231
|
+
ggml_vec_sum_f16_ggf(ne00,
|
9232
|
+
&row_sum,
|
9233
|
+
(ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03));
|
9234
|
+
sum += row_sum;
|
9235
|
+
}
|
9236
|
+
}
|
9237
|
+
}
|
9238
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum);
|
9239
|
+
}
|
9240
|
+
|
9517
9241
|
static void ggml_compute_forward_sum(
|
9518
9242
|
const struct ggml_compute_params * params,
|
9519
9243
|
const struct ggml_tensor * src0,
|
@@ -9523,6 +9247,10 @@ static void ggml_compute_forward_sum(
|
|
9523
9247
|
{
|
9524
9248
|
ggml_compute_forward_sum_f32(params, src0, dst);
|
9525
9249
|
} break;
|
9250
|
+
case GGML_TYPE_F16:
|
9251
|
+
{
|
9252
|
+
ggml_compute_forward_sum_f16(params, src0, dst);
|
9253
|
+
} break;
|
9526
9254
|
default:
|
9527
9255
|
{
|
9528
9256
|
GGML_ASSERT(false);
|
@@ -10118,8 +9846,8 @@ static void ggml_compute_forward_gelu_f32(
|
|
10118
9846
|
const struct ggml_compute_params * params,
|
10119
9847
|
const struct ggml_tensor * src0,
|
10120
9848
|
struct ggml_tensor * dst) {
|
10121
|
-
GGML_ASSERT(
|
10122
|
-
GGML_ASSERT(
|
9849
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9850
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10123
9851
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10124
9852
|
|
10125
9853
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10177,8 +9905,8 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
10177
9905
|
const struct ggml_compute_params * params,
|
10178
9906
|
const struct ggml_tensor * src0,
|
10179
9907
|
struct ggml_tensor * dst) {
|
10180
|
-
GGML_ASSERT(
|
10181
|
-
GGML_ASSERT(
|
9908
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9909
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10182
9910
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10183
9911
|
|
10184
9912
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10236,8 +9964,8 @@ static void ggml_compute_forward_silu_f32(
|
|
10236
9964
|
const struct ggml_compute_params * params,
|
10237
9965
|
const struct ggml_tensor * src0,
|
10238
9966
|
struct ggml_tensor * dst) {
|
10239
|
-
GGML_ASSERT(
|
10240
|
-
GGML_ASSERT(
|
9967
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9968
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10241
9969
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10242
9970
|
|
10243
9971
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10289,7 +10017,6 @@ static void ggml_compute_forward_silu(
|
|
10289
10017
|
}
|
10290
10018
|
}
|
10291
10019
|
|
10292
|
-
|
10293
10020
|
// ggml_compute_forward_silu_back
|
10294
10021
|
|
10295
10022
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -10297,9 +10024,9 @@ static void ggml_compute_forward_silu_back_f32(
|
|
10297
10024
|
const struct ggml_tensor * src0,
|
10298
10025
|
const struct ggml_tensor * grad,
|
10299
10026
|
struct ggml_tensor * dst) {
|
10300
|
-
GGML_ASSERT(
|
10301
|
-
GGML_ASSERT(
|
10302
|
-
GGML_ASSERT(
|
10027
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
10028
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
10029
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10303
10030
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10304
10031
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
10305
10032
|
|
@@ -10439,7 +10166,8 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
10439
10166
|
|
10440
10167
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
10441
10168
|
|
10442
|
-
|
10169
|
+
float eps;
|
10170
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10443
10171
|
|
10444
10172
|
// TODO: optimize
|
10445
10173
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -11092,21 +10820,17 @@ static void ggml_compute_forward_set_f32(
|
|
11092
10820
|
const struct ggml_compute_params * params,
|
11093
10821
|
const struct ggml_tensor * src0,
|
11094
10822
|
const struct ggml_tensor * src1,
|
11095
|
-
const struct ggml_tensor * opt0,
|
11096
10823
|
struct ggml_tensor * dst) {
|
11097
10824
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11098
10825
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
11099
10826
|
|
11100
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
11101
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
11102
|
-
|
11103
10827
|
// view src0 and dst with these strides and data offset inbytes during set
|
11104
10828
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
11105
|
-
size_t nb1 = ((int32_t *)
|
11106
|
-
size_t nb2 = ((int32_t *)
|
11107
|
-
size_t nb3 = ((int32_t *)
|
11108
|
-
size_t offset = ((int32_t *)
|
11109
|
-
bool inplace = (bool) ((int32_t *)
|
10829
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
10830
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
10831
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
10832
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
10833
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11110
10834
|
|
11111
10835
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
11112
10836
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -11166,13 +10890,12 @@ static void ggml_compute_forward_set(
|
|
11166
10890
|
const struct ggml_compute_params * params,
|
11167
10891
|
const struct ggml_tensor * src0,
|
11168
10892
|
const struct ggml_tensor * src1,
|
11169
|
-
const struct ggml_tensor * opt0,
|
11170
10893
|
struct ggml_tensor * dst) {
|
11171
10894
|
|
11172
10895
|
switch (src0->type) {
|
11173
10896
|
case GGML_TYPE_F32:
|
11174
10897
|
{
|
11175
|
-
ggml_compute_forward_set_f32(params, src0, src1,
|
10898
|
+
ggml_compute_forward_set_f32(params, src0, src1, dst);
|
11176
10899
|
} break;
|
11177
10900
|
case GGML_TYPE_F16:
|
11178
10901
|
case GGML_TYPE_Q4_0:
|
@@ -11568,17 +11291,14 @@ static void ggml_compute_forward_diag(
|
|
11568
11291
|
static void ggml_compute_forward_diag_mask_f32(
|
11569
11292
|
const struct ggml_compute_params * params,
|
11570
11293
|
const struct ggml_tensor * src0,
|
11571
|
-
const struct ggml_tensor * src1,
|
11572
11294
|
struct ggml_tensor * dst,
|
11573
11295
|
const float value) {
|
11574
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11575
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11576
11296
|
|
11577
11297
|
const int ith = params->ith;
|
11578
11298
|
const int nth = params->nth;
|
11579
11299
|
|
11580
|
-
const int n_past = ((int32_t *)
|
11581
|
-
const bool inplace = (bool)((int32_t *)
|
11300
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11301
|
+
const bool inplace = (bool)((int32_t *) dst->op_params)[1];
|
11582
11302
|
|
11583
11303
|
GGML_ASSERT(n_past >= 0);
|
11584
11304
|
|
@@ -11621,12 +11341,11 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11621
11341
|
static void ggml_compute_forward_diag_mask_inf(
|
11622
11342
|
const struct ggml_compute_params * params,
|
11623
11343
|
const struct ggml_tensor * src0,
|
11624
|
-
const struct ggml_tensor * src1,
|
11625
11344
|
struct ggml_tensor * dst) {
|
11626
11345
|
switch (src0->type) {
|
11627
11346
|
case GGML_TYPE_F32:
|
11628
11347
|
{
|
11629
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11348
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
|
11630
11349
|
} break;
|
11631
11350
|
default:
|
11632
11351
|
{
|
@@ -11638,12 +11357,11 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11638
11357
|
static void ggml_compute_forward_diag_mask_zero(
|
11639
11358
|
const struct ggml_compute_params * params,
|
11640
11359
|
const struct ggml_tensor * src0,
|
11641
|
-
const struct ggml_tensor * src1,
|
11642
11360
|
struct ggml_tensor * dst) {
|
11643
11361
|
switch (src0->type) {
|
11644
11362
|
case GGML_TYPE_F32:
|
11645
11363
|
{
|
11646
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11364
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
|
11647
11365
|
} break;
|
11648
11366
|
default:
|
11649
11367
|
{
|
@@ -11841,20 +11559,17 @@ static void ggml_compute_forward_soft_max_back(
|
|
11841
11559
|
static void ggml_compute_forward_alibi_f32(
|
11842
11560
|
const struct ggml_compute_params * params,
|
11843
11561
|
const struct ggml_tensor * src0,
|
11844
|
-
const struct ggml_tensor * src1,
|
11845
11562
|
struct ggml_tensor * dst) {
|
11846
11563
|
assert(params->ith == 0);
|
11847
11564
|
|
11848
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11849
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11850
|
-
|
11851
11565
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11852
11566
|
return;
|
11853
11567
|
}
|
11854
11568
|
|
11855
|
-
const int
|
11856
|
-
const int
|
11857
|
-
|
11569
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11570
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11571
|
+
float max_bias;
|
11572
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11858
11573
|
|
11859
11574
|
assert(n_past >= 0);
|
11860
11575
|
|
@@ -11907,20 +11622,17 @@ static void ggml_compute_forward_alibi_f32(
|
|
11907
11622
|
static void ggml_compute_forward_alibi_f16(
|
11908
11623
|
const struct ggml_compute_params * params,
|
11909
11624
|
const struct ggml_tensor * src0,
|
11910
|
-
const struct ggml_tensor * src1,
|
11911
11625
|
struct ggml_tensor * dst) {
|
11912
11626
|
assert(params->ith == 0);
|
11913
11627
|
|
11914
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11915
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11916
|
-
|
11917
11628
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11918
11629
|
return;
|
11919
11630
|
}
|
11920
11631
|
|
11921
|
-
const int
|
11922
|
-
const int
|
11923
|
-
|
11632
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11633
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11634
|
+
float max_bias;
|
11635
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11924
11636
|
|
11925
11637
|
assert(n_past >= 0);
|
11926
11638
|
|
@@ -11973,16 +11685,15 @@ static void ggml_compute_forward_alibi_f16(
|
|
11973
11685
|
static void ggml_compute_forward_alibi(
|
11974
11686
|
const struct ggml_compute_params * params,
|
11975
11687
|
const struct ggml_tensor * src0,
|
11976
|
-
const struct ggml_tensor * src1,
|
11977
11688
|
struct ggml_tensor * dst) {
|
11978
11689
|
switch (src0->type) {
|
11979
11690
|
case GGML_TYPE_F16:
|
11980
11691
|
{
|
11981
|
-
ggml_compute_forward_alibi_f16(params, src0,
|
11692
|
+
ggml_compute_forward_alibi_f16(params, src0, dst);
|
11982
11693
|
} break;
|
11983
11694
|
case GGML_TYPE_F32:
|
11984
11695
|
{
|
11985
|
-
ggml_compute_forward_alibi_f32(params, src0,
|
11696
|
+
ggml_compute_forward_alibi_f32(params, src0, dst);
|
11986
11697
|
} break;
|
11987
11698
|
case GGML_TYPE_Q4_0:
|
11988
11699
|
case GGML_TYPE_Q4_1:
|
@@ -12012,19 +11723,17 @@ static void ggml_compute_forward_alibi(
|
|
12012
11723
|
static void ggml_compute_forward_clamp_f32(
|
12013
11724
|
const struct ggml_compute_params * params,
|
12014
11725
|
const struct ggml_tensor * src0,
|
12015
|
-
const struct ggml_tensor * src1,
|
12016
11726
|
struct ggml_tensor * dst) {
|
12017
11727
|
assert(params->ith == 0);
|
12018
11728
|
|
12019
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12020
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
12021
|
-
|
12022
11729
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12023
11730
|
return;
|
12024
11731
|
}
|
12025
11732
|
|
12026
|
-
|
12027
|
-
|
11733
|
+
float min;
|
11734
|
+
float max;
|
11735
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
11736
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
12028
11737
|
|
12029
11738
|
const int ith = params->ith;
|
12030
11739
|
const int nth = params->nth;
|
@@ -12054,12 +11763,11 @@ static void ggml_compute_forward_clamp_f32(
|
|
12054
11763
|
static void ggml_compute_forward_clamp(
|
12055
11764
|
const struct ggml_compute_params * params,
|
12056
11765
|
const struct ggml_tensor * src0,
|
12057
|
-
const struct ggml_tensor * src1,
|
12058
11766
|
struct ggml_tensor * dst) {
|
12059
11767
|
switch (src0->type) {
|
12060
11768
|
case GGML_TYPE_F32:
|
12061
11769
|
{
|
12062
|
-
ggml_compute_forward_clamp_f32(params, src0,
|
11770
|
+
ggml_compute_forward_clamp_f32(params, src0, dst);
|
12063
11771
|
} break;
|
12064
11772
|
case GGML_TYPE_F16:
|
12065
11773
|
case GGML_TYPE_Q4_0:
|
@@ -12089,10 +11797,7 @@ static void ggml_compute_forward_clamp(
|
|
12089
11797
|
static void ggml_compute_forward_rope_f32(
|
12090
11798
|
const struct ggml_compute_params * params,
|
12091
11799
|
const struct ggml_tensor * src0,
|
12092
|
-
const struct ggml_tensor * src1,
|
12093
11800
|
struct ggml_tensor * dst) {
|
12094
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12095
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12096
11801
|
|
12097
11802
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12098
11803
|
return;
|
@@ -12101,12 +11806,12 @@ static void ggml_compute_forward_rope_f32(
|
|
12101
11806
|
float freq_base;
|
12102
11807
|
float freq_scale;
|
12103
11808
|
|
12104
|
-
const int n_past = ((int32_t *)
|
12105
|
-
const int n_dims = ((int32_t *)
|
12106
|
-
const int mode = ((int32_t *)
|
12107
|
-
const int n_ctx = ((int32_t *)
|
12108
|
-
memcpy(&freq_base, (int32_t *)
|
12109
|
-
memcpy(&freq_scale, (int32_t *)
|
11809
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11810
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11811
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11812
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11813
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11814
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12110
11815
|
|
12111
11816
|
assert(n_past >= 0);
|
12112
11817
|
|
@@ -12221,10 +11926,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12221
11926
|
static void ggml_compute_forward_rope_f16(
|
12222
11927
|
const struct ggml_compute_params * params,
|
12223
11928
|
const struct ggml_tensor * src0,
|
12224
|
-
const struct ggml_tensor * src1,
|
12225
11929
|
struct ggml_tensor * dst) {
|
12226
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12227
|
-
GGML_ASSERT(ggml_nelements(src1) == 6);
|
12228
11930
|
|
12229
11931
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12230
11932
|
return;
|
@@ -12233,12 +11935,12 @@ static void ggml_compute_forward_rope_f16(
|
|
12233
11935
|
float freq_base;
|
12234
11936
|
float freq_scale;
|
12235
11937
|
|
12236
|
-
const int n_past = ((int32_t *)
|
12237
|
-
const int n_dims = ((int32_t *)
|
12238
|
-
const int mode = ((int32_t *)
|
12239
|
-
const int n_ctx = ((int32_t *)
|
12240
|
-
memcpy(&freq_base, (int32_t *)
|
12241
|
-
memcpy(&freq_scale, (int32_t *)
|
11938
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11939
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11940
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11941
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11942
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11943
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12242
11944
|
|
12243
11945
|
assert(n_past >= 0);
|
12244
11946
|
|
@@ -12353,16 +12055,15 @@ static void ggml_compute_forward_rope_f16(
|
|
12353
12055
|
static void ggml_compute_forward_rope(
|
12354
12056
|
const struct ggml_compute_params * params,
|
12355
12057
|
const struct ggml_tensor * src0,
|
12356
|
-
const struct ggml_tensor * src1,
|
12357
12058
|
struct ggml_tensor * dst) {
|
12358
12059
|
switch (src0->type) {
|
12359
12060
|
case GGML_TYPE_F16:
|
12360
12061
|
{
|
12361
|
-
ggml_compute_forward_rope_f16(params, src0,
|
12062
|
+
ggml_compute_forward_rope_f16(params, src0, dst);
|
12362
12063
|
} break;
|
12363
12064
|
case GGML_TYPE_F32:
|
12364
12065
|
{
|
12365
|
-
ggml_compute_forward_rope_f32(params, src0,
|
12066
|
+
ggml_compute_forward_rope_f32(params, src0, dst);
|
12366
12067
|
} break;
|
12367
12068
|
default:
|
12368
12069
|
{
|
@@ -12376,10 +12077,7 @@ static void ggml_compute_forward_rope(
|
|
12376
12077
|
static void ggml_compute_forward_rope_back_f32(
|
12377
12078
|
const struct ggml_compute_params * params,
|
12378
12079
|
const struct ggml_tensor * src0,
|
12379
|
-
const struct ggml_tensor * src1,
|
12380
12080
|
struct ggml_tensor * dst) {
|
12381
|
-
assert(src1->type == GGML_TYPE_I32);
|
12382
|
-
assert(ggml_nelements(src1) == 4);
|
12383
12081
|
|
12384
12082
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12385
12083
|
return;
|
@@ -12389,9 +12087,9 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12389
12087
|
// dx = rope_back(dy, src1)
|
12390
12088
|
// src0 is dy, src1 contains options
|
12391
12089
|
|
12392
|
-
const int n_past = ((int32_t *)
|
12393
|
-
const int n_dims = ((int32_t *)
|
12394
|
-
const int mode = ((int32_t *)
|
12090
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12091
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12092
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12395
12093
|
|
12396
12094
|
assert(n_past >= 0);
|
12397
12095
|
|
@@ -12475,10 +12173,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12475
12173
|
static void ggml_compute_forward_rope_back_f16(
|
12476
12174
|
const struct ggml_compute_params * params,
|
12477
12175
|
const struct ggml_tensor * src0,
|
12478
|
-
const struct ggml_tensor * src1,
|
12479
12176
|
struct ggml_tensor * dst) {
|
12480
|
-
assert(src1->type == GGML_TYPE_I32);
|
12481
|
-
assert(ggml_nelements(src1) == 3);
|
12482
12177
|
|
12483
12178
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12484
12179
|
return;
|
@@ -12488,9 +12183,9 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12488
12183
|
// dx = rope_back(dy, src1)
|
12489
12184
|
// src0 is dy, src1 contains options
|
12490
12185
|
|
12491
|
-
const int n_past = ((int32_t *)
|
12492
|
-
const int n_dims = ((int32_t *)
|
12493
|
-
const int mode = ((int32_t *)
|
12186
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12187
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12188
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12494
12189
|
|
12495
12190
|
assert(n_past >= 0);
|
12496
12191
|
|
@@ -12574,16 +12269,15 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12574
12269
|
static void ggml_compute_forward_rope_back(
|
12575
12270
|
const struct ggml_compute_params * params,
|
12576
12271
|
const struct ggml_tensor * src0,
|
12577
|
-
const struct ggml_tensor * src1,
|
12578
12272
|
struct ggml_tensor * dst) {
|
12579
12273
|
switch (src0->type) {
|
12580
12274
|
case GGML_TYPE_F16:
|
12581
12275
|
{
|
12582
|
-
ggml_compute_forward_rope_back_f16(params, src0,
|
12276
|
+
ggml_compute_forward_rope_back_f16(params, src0, dst);
|
12583
12277
|
} break;
|
12584
12278
|
case GGML_TYPE_F32:
|
12585
12279
|
{
|
12586
|
-
ggml_compute_forward_rope_back_f32(params, src0,
|
12280
|
+
ggml_compute_forward_rope_back_f32(params, src0, dst);
|
12587
12281
|
} break;
|
12588
12282
|
default:
|
12589
12283
|
{
|
@@ -12780,7 +12474,7 @@ static void ggml_compute_forward_conv_1d_s1_ph(
|
|
12780
12474
|
const struct ggml_compute_params * params,
|
12781
12475
|
const struct ggml_tensor * src0,
|
12782
12476
|
const struct ggml_tensor * src1,
|
12783
|
-
|
12477
|
+
struct ggml_tensor * dst) {
|
12784
12478
|
switch (src0->type) {
|
12785
12479
|
case GGML_TYPE_F16:
|
12786
12480
|
{
|
@@ -12983,7 +12677,7 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12983
12677
|
const struct ggml_compute_params * params,
|
12984
12678
|
const struct ggml_tensor * src0,
|
12985
12679
|
const struct ggml_tensor * src1,
|
12986
|
-
|
12680
|
+
struct ggml_tensor * dst) {
|
12987
12681
|
switch (src0->type) {
|
12988
12682
|
case GGML_TYPE_F16:
|
12989
12683
|
{
|
@@ -13003,14 +12697,13 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
13003
12697
|
// ggml_compute_forward_conv_1d
|
13004
12698
|
|
13005
12699
|
static void ggml_compute_forward_conv_1d(
|
13006
|
-
|
13007
|
-
|
13008
|
-
|
13009
|
-
|
13010
|
-
|
13011
|
-
const int32_t
|
13012
|
-
const int32_t
|
13013
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[2];
|
12700
|
+
const struct ggml_compute_params * params,
|
12701
|
+
const struct ggml_tensor * src0,
|
12702
|
+
const struct ggml_tensor * src1,
|
12703
|
+
struct ggml_tensor * dst) {
|
12704
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12705
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
12706
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
13014
12707
|
GGML_ASSERT(d0 == 1); // dilation not supported
|
13015
12708
|
GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
|
13016
12709
|
if (s0 == 1) {
|
@@ -13028,7 +12721,6 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13028
12721
|
const struct ggml_compute_params * params,
|
13029
12722
|
const struct ggml_tensor * src0,
|
13030
12723
|
const struct ggml_tensor * src1,
|
13031
|
-
const struct ggml_tensor * opt0,
|
13032
12724
|
struct ggml_tensor * dst) {
|
13033
12725
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
13034
12726
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
@@ -13048,12 +12740,12 @@ static void ggml_compute_forward_conv_2d_f16_f32(
|
|
13048
12740
|
// size of the convolution row - the kernel size unrolled across all channels
|
13049
12741
|
const int ew0 = nk0*nk1*ne02;
|
13050
12742
|
|
13051
|
-
const int32_t s0 = ((const int32_t*)(
|
13052
|
-
const int32_t s1 = ((const int32_t*)(
|
13053
|
-
const int32_t p0 = ((const int32_t*)(
|
13054
|
-
const int32_t p1 = ((const int32_t*)(
|
13055
|
-
const int32_t d0 = ((const int32_t*)(
|
13056
|
-
const int32_t d1 = ((const int32_t*)(
|
12743
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12744
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12745
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12746
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12747
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12748
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
13057
12749
|
|
13058
12750
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13059
12751
|
GGML_ASSERT(nb10 == sizeof(float));
|
@@ -13125,17 +12817,15 @@ static void ggml_compute_forward_conv_2d(
|
|
13125
12817
|
const struct ggml_compute_params * params,
|
13126
12818
|
const struct ggml_tensor * src0,
|
13127
12819
|
const struct ggml_tensor * src1,
|
13128
|
-
|
13129
|
-
struct ggml_tensor * dst
|
13130
|
-
) {
|
12820
|
+
struct ggml_tensor * dst) {
|
13131
12821
|
switch (src0->type) {
|
13132
12822
|
case GGML_TYPE_F16:
|
13133
12823
|
{
|
13134
|
-
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1,
|
12824
|
+
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
13135
12825
|
} break;
|
13136
12826
|
case GGML_TYPE_F32:
|
13137
12827
|
{
|
13138
|
-
//ggml_compute_forward_conv_2d_f32(params, src0, src1,
|
12828
|
+
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
13139
12829
|
GGML_ASSERT(false);
|
13140
12830
|
} break;
|
13141
12831
|
default:
|
@@ -13200,12 +12890,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13200
12890
|
// ggml_compute_forward_pool_1d
|
13201
12891
|
|
13202
12892
|
static void ggml_compute_forward_pool_1d(
|
13203
|
-
|
13204
|
-
|
13205
|
-
|
13206
|
-
|
13207
|
-
|
13208
|
-
const int* opts = (const int*)opt0->data;
|
12893
|
+
const struct ggml_compute_params * params,
|
12894
|
+
const struct ggml_tensor * src0,
|
12895
|
+
struct ggml_tensor * dst) {
|
12896
|
+
|
12897
|
+
const int32_t* opts = (const int32_t*)dst->op_params;
|
13209
12898
|
enum ggml_op_pool op = opts[0];
|
13210
12899
|
const int k0 = opts[1];
|
13211
12900
|
const int s0 = opts[2];
|
@@ -13219,12 +12908,12 @@ static void ggml_compute_forward_pool_1d(
|
|
13219
12908
|
// ggml_compute_forward_pool_2d_sk_p0
|
13220
12909
|
|
13221
12910
|
static void ggml_compute_forward_pool_2d_sk_p0(
|
13222
|
-
|
13223
|
-
|
13224
|
-
|
13225
|
-
|
13226
|
-
|
13227
|
-
|
12911
|
+
const struct ggml_compute_params * params,
|
12912
|
+
const enum ggml_op_pool op,
|
12913
|
+
const struct ggml_tensor * src,
|
12914
|
+
const int k0,
|
12915
|
+
const int k1,
|
12916
|
+
struct ggml_tensor * dst) {
|
13228
12917
|
assert(src->type == GGML_TYPE_F32);
|
13229
12918
|
assert(params->ith == 0);
|
13230
12919
|
|
@@ -13284,12 +12973,11 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
13284
12973
|
// ggml_compute_forward_pool_2d
|
13285
12974
|
|
13286
12975
|
static void ggml_compute_forward_pool_2d(
|
13287
|
-
|
13288
|
-
|
13289
|
-
|
13290
|
-
|
13291
|
-
|
13292
|
-
const int* opts = (const int*)opt0->data;
|
12976
|
+
const struct ggml_compute_params * params,
|
12977
|
+
const struct ggml_tensor * src0,
|
12978
|
+
struct ggml_tensor * dst) {
|
12979
|
+
|
12980
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
13293
12981
|
enum ggml_op_pool op = opts[0];
|
13294
12982
|
const int k0 = opts[1];
|
13295
12983
|
const int k1 = opts[2];
|
@@ -13314,7 +13002,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13314
13002
|
const struct ggml_tensor * k,
|
13315
13003
|
const struct ggml_tensor * v,
|
13316
13004
|
const bool masked,
|
13317
|
-
|
13005
|
+
struct ggml_tensor * dst) {
|
13318
13006
|
int64_t t0 = ggml_perf_time_us();
|
13319
13007
|
UNUSED(t0);
|
13320
13008
|
|
@@ -13492,7 +13180,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13492
13180
|
const struct ggml_tensor * k,
|
13493
13181
|
const struct ggml_tensor * v,
|
13494
13182
|
const bool masked,
|
13495
|
-
|
13183
|
+
struct ggml_tensor * dst) {
|
13496
13184
|
int64_t t0 = ggml_perf_time_us();
|
13497
13185
|
UNUSED(t0);
|
13498
13186
|
|
@@ -14257,7 +13945,6 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14257
13945
|
static void ggml_compute_forward_win_part_f32(
|
14258
13946
|
const struct ggml_compute_params * params,
|
14259
13947
|
const struct ggml_tensor * src0,
|
14260
|
-
const struct ggml_tensor * opt0,
|
14261
13948
|
struct ggml_tensor * dst) {
|
14262
13949
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14263
13950
|
return;
|
@@ -14266,9 +13953,9 @@ static void ggml_compute_forward_win_part_f32(
|
|
14266
13953
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14267
13954
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14268
13955
|
|
14269
|
-
const int32_t nep0 = ((const int32_t *)(
|
14270
|
-
const int32_t nep1 = ((const int32_t *)(
|
14271
|
-
const int32_t w = ((const int32_t *)(
|
13956
|
+
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
13957
|
+
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
13958
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[2];
|
14272
13959
|
|
14273
13960
|
assert(ne00 == ne0);
|
14274
13961
|
assert(ne3 == nep0*nep1);
|
@@ -14302,12 +13989,11 @@ static void ggml_compute_forward_win_part_f32(
|
|
14302
13989
|
static void ggml_compute_forward_win_part(
|
14303
13990
|
const struct ggml_compute_params * params,
|
14304
13991
|
const struct ggml_tensor * src0,
|
14305
|
-
const struct ggml_tensor * opt0,
|
14306
13992
|
struct ggml_tensor * dst) {
|
14307
13993
|
switch (src0->type) {
|
14308
13994
|
case GGML_TYPE_F32:
|
14309
13995
|
{
|
14310
|
-
ggml_compute_forward_win_part_f32(params, src0,
|
13996
|
+
ggml_compute_forward_win_part_f32(params, src0, dst);
|
14311
13997
|
} break;
|
14312
13998
|
default:
|
14313
13999
|
{
|
@@ -14321,7 +14007,6 @@ static void ggml_compute_forward_win_part(
|
|
14321
14007
|
static void ggml_compute_forward_win_unpart_f32(
|
14322
14008
|
const struct ggml_compute_params * params,
|
14323
14009
|
const struct ggml_tensor * src0,
|
14324
|
-
const struct ggml_tensor * opt0,
|
14325
14010
|
struct ggml_tensor * dst) {
|
14326
14011
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14327
14012
|
return;
|
@@ -14330,7 +14015,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14330
14015
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14331
14016
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14332
14017
|
|
14333
|
-
const int32_t w = ((const int32_t *)(
|
14018
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
14334
14019
|
|
14335
14020
|
// padding
|
14336
14021
|
const int px = (w - ne1%w)%w;
|
@@ -14364,12 +14049,67 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14364
14049
|
static void ggml_compute_forward_win_unpart(
|
14365
14050
|
const struct ggml_compute_params * params,
|
14366
14051
|
const struct ggml_tensor * src0,
|
14367
|
-
const struct ggml_tensor * opt0,
|
14368
14052
|
struct ggml_tensor * dst) {
|
14369
14053
|
switch (src0->type) {
|
14370
14054
|
case GGML_TYPE_F32:
|
14371
14055
|
{
|
14372
|
-
ggml_compute_forward_win_unpart_f32(params, src0,
|
14056
|
+
ggml_compute_forward_win_unpart_f32(params, src0, dst);
|
14057
|
+
} break;
|
14058
|
+
default:
|
14059
|
+
{
|
14060
|
+
GGML_ASSERT(false);
|
14061
|
+
} break;
|
14062
|
+
}
|
14063
|
+
}
|
14064
|
+
|
14065
|
+
//gmml_compute_forward_unary
|
14066
|
+
|
14067
|
+
static void ggml_compute_forward_unary(
|
14068
|
+
const struct ggml_compute_params * params,
|
14069
|
+
const struct ggml_tensor * src0,
|
14070
|
+
struct ggml_tensor * dst) {
|
14071
|
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14072
|
+
|
14073
|
+
switch (op) {
|
14074
|
+
case GGML_UNARY_OP_ABS:
|
14075
|
+
{
|
14076
|
+
ggml_compute_forward_abs(params, src0, dst);
|
14077
|
+
} break;
|
14078
|
+
case GGML_UNARY_OP_SGN:
|
14079
|
+
{
|
14080
|
+
ggml_compute_forward_sgn(params, src0, dst);
|
14081
|
+
} break;
|
14082
|
+
case GGML_UNARY_OP_NEG:
|
14083
|
+
{
|
14084
|
+
ggml_compute_forward_neg(params, src0, dst);
|
14085
|
+
} break;
|
14086
|
+
case GGML_UNARY_OP_STEP:
|
14087
|
+
{
|
14088
|
+
ggml_compute_forward_step(params, src0, dst);
|
14089
|
+
} break;
|
14090
|
+
case GGML_UNARY_OP_TANH:
|
14091
|
+
{
|
14092
|
+
ggml_compute_forward_tanh(params, src0, dst);
|
14093
|
+
} break;
|
14094
|
+
case GGML_UNARY_OP_ELU:
|
14095
|
+
{
|
14096
|
+
ggml_compute_forward_elu(params, src0, dst);
|
14097
|
+
} break;
|
14098
|
+
case GGML_UNARY_OP_RELU:
|
14099
|
+
{
|
14100
|
+
ggml_compute_forward_relu(params, src0, dst);
|
14101
|
+
} break;
|
14102
|
+
case GGML_UNARY_OP_GELU:
|
14103
|
+
{
|
14104
|
+
ggml_compute_forward_gelu(params, src0, dst);
|
14105
|
+
} break;
|
14106
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
14107
|
+
{
|
14108
|
+
ggml_compute_forward_gelu_quick(params, src0, dst);
|
14109
|
+
} break;
|
14110
|
+
case GGML_UNARY_OP_SILU:
|
14111
|
+
{
|
14112
|
+
ggml_compute_forward_silu(params, src0, dst);
|
14373
14113
|
} break;
|
14374
14114
|
default:
|
14375
14115
|
{
|
@@ -14888,7 +14628,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14888
14628
|
} break;
|
14889
14629
|
case GGML_OP_ACC:
|
14890
14630
|
{
|
14891
|
-
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor
|
14631
|
+
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
|
14892
14632
|
} break;
|
14893
14633
|
case GGML_OP_SUB:
|
14894
14634
|
{
|
@@ -14938,46 +14678,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14938
14678
|
{
|
14939
14679
|
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
|
14940
14680
|
} break;
|
14941
|
-
case GGML_OP_ABS:
|
14942
|
-
{
|
14943
|
-
ggml_compute_forward_abs(params, tensor->src[0], tensor);
|
14944
|
-
} break;
|
14945
|
-
case GGML_OP_SGN:
|
14946
|
-
{
|
14947
|
-
ggml_compute_forward_sgn(params, tensor->src[0], tensor);
|
14948
|
-
} break;
|
14949
|
-
case GGML_OP_NEG:
|
14950
|
-
{
|
14951
|
-
ggml_compute_forward_neg(params, tensor->src[0], tensor);
|
14952
|
-
} break;
|
14953
|
-
case GGML_OP_STEP:
|
14954
|
-
{
|
14955
|
-
ggml_compute_forward_step(params, tensor->src[0], tensor);
|
14956
|
-
} break;
|
14957
|
-
case GGML_OP_TANH:
|
14958
|
-
{
|
14959
|
-
ggml_compute_forward_tanh(params, tensor->src[0], tensor);
|
14960
|
-
} break;
|
14961
|
-
case GGML_OP_ELU:
|
14962
|
-
{
|
14963
|
-
ggml_compute_forward_elu(params, tensor->src[0], tensor);
|
14964
|
-
} break;
|
14965
|
-
case GGML_OP_RELU:
|
14966
|
-
{
|
14967
|
-
ggml_compute_forward_relu(params, tensor->src[0], tensor);
|
14968
|
-
} break;
|
14969
|
-
case GGML_OP_GELU:
|
14970
|
-
{
|
14971
|
-
ggml_compute_forward_gelu(params, tensor->src[0], tensor);
|
14972
|
-
} break;
|
14973
|
-
case GGML_OP_GELU_QUICK:
|
14974
|
-
{
|
14975
|
-
ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor);
|
14976
|
-
} break;
|
14977
|
-
case GGML_OP_SILU:
|
14978
|
-
{
|
14979
|
-
ggml_compute_forward_silu(params, tensor->src[0], tensor);
|
14980
|
-
} break;
|
14981
14681
|
case GGML_OP_SILU_BACK:
|
14982
14682
|
{
|
14983
14683
|
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
|
@@ -15008,7 +14708,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15008
14708
|
} break;
|
15009
14709
|
case GGML_OP_SET:
|
15010
14710
|
{
|
15011
|
-
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor
|
14711
|
+
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
|
15012
14712
|
} break;
|
15013
14713
|
case GGML_OP_CPY:
|
15014
14714
|
{
|
@@ -15048,11 +14748,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15048
14748
|
} break;
|
15049
14749
|
case GGML_OP_DIAG_MASK_INF:
|
15050
14750
|
{
|
15051
|
-
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor
|
14751
|
+
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
|
15052
14752
|
} break;
|
15053
14753
|
case GGML_OP_DIAG_MASK_ZERO:
|
15054
14754
|
{
|
15055
|
-
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor
|
14755
|
+
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
|
15056
14756
|
} break;
|
15057
14757
|
case GGML_OP_SOFT_MAX:
|
15058
14758
|
{
|
@@ -15064,39 +14764,39 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15064
14764
|
} break;
|
15065
14765
|
case GGML_OP_ROPE:
|
15066
14766
|
{
|
15067
|
-
ggml_compute_forward_rope(params, tensor->src[0], tensor
|
14767
|
+
ggml_compute_forward_rope(params, tensor->src[0], tensor);
|
15068
14768
|
} break;
|
15069
14769
|
case GGML_OP_ROPE_BACK:
|
15070
14770
|
{
|
15071
|
-
ggml_compute_forward_rope_back(params, tensor->src[0], tensor
|
14771
|
+
ggml_compute_forward_rope_back(params, tensor->src[0], tensor);
|
15072
14772
|
} break;
|
15073
14773
|
case GGML_OP_ALIBI:
|
15074
14774
|
{
|
15075
|
-
ggml_compute_forward_alibi(params, tensor->src[0], tensor
|
14775
|
+
ggml_compute_forward_alibi(params, tensor->src[0], tensor);
|
15076
14776
|
} break;
|
15077
14777
|
case GGML_OP_CLAMP:
|
15078
14778
|
{
|
15079
|
-
ggml_compute_forward_clamp(params, tensor->src[0], tensor
|
14779
|
+
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
15080
14780
|
} break;
|
15081
14781
|
case GGML_OP_CONV_1D:
|
15082
14782
|
{
|
15083
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor
|
14783
|
+
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
15084
14784
|
} break;
|
15085
14785
|
case GGML_OP_CONV_2D:
|
15086
14786
|
{
|
15087
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor
|
14787
|
+
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15088
14788
|
} break;
|
15089
14789
|
case GGML_OP_POOL_1D:
|
15090
14790
|
{
|
15091
|
-
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor
|
14791
|
+
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
|
15092
14792
|
} break;
|
15093
14793
|
case GGML_OP_POOL_2D:
|
15094
14794
|
{
|
15095
|
-
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor
|
14795
|
+
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
|
15096
14796
|
} break;
|
15097
14797
|
case GGML_OP_FLASH_ATTN:
|
15098
14798
|
{
|
15099
|
-
const int32_t t =
|
14799
|
+
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15100
14800
|
GGML_ASSERT(t == 0 || t == 1);
|
15101
14801
|
const bool masked = t != 0;
|
15102
14802
|
ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
|
@@ -15107,47 +14807,56 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15107
14807
|
} break;
|
15108
14808
|
case GGML_OP_FLASH_ATTN_BACK:
|
15109
14809
|
{
|
15110
|
-
int32_t t =
|
14810
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15111
14811
|
GGML_ASSERT(t == 0 || t == 1);
|
15112
14812
|
bool masked = t != 0;
|
15113
14813
|
ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
|
15114
14814
|
} break;
|
15115
14815
|
case GGML_OP_WIN_PART:
|
15116
14816
|
{
|
15117
|
-
ggml_compute_forward_win_part(params, tensor->src[0], tensor
|
14817
|
+
ggml_compute_forward_win_part(params, tensor->src[0], tensor);
|
15118
14818
|
} break;
|
15119
14819
|
case GGML_OP_WIN_UNPART:
|
15120
14820
|
{
|
15121
|
-
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor
|
14821
|
+
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
|
14822
|
+
} break;
|
14823
|
+
case GGML_OP_UNARY:
|
14824
|
+
{
|
14825
|
+
ggml_compute_forward_unary(params, tensor->src[0], tensor);
|
15122
14826
|
} break;
|
15123
14827
|
case GGML_OP_MAP_UNARY:
|
15124
14828
|
{
|
15125
|
-
|
14829
|
+
ggml_unary_op_f32_t fun;
|
14830
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15126
14831
|
ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
|
15127
14832
|
}
|
15128
14833
|
break;
|
15129
14834
|
case GGML_OP_MAP_BINARY:
|
15130
14835
|
{
|
15131
|
-
|
14836
|
+
ggml_binary_op_f32_t fun;
|
14837
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15132
14838
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15133
14839
|
}
|
15134
14840
|
break;
|
15135
14841
|
case GGML_OP_MAP_CUSTOM1:
|
15136
14842
|
{
|
15137
|
-
|
14843
|
+
ggml_custom1_op_f32_t fun;
|
14844
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15138
14845
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
15139
14846
|
}
|
15140
14847
|
break;
|
15141
14848
|
case GGML_OP_MAP_CUSTOM2:
|
15142
14849
|
{
|
15143
|
-
|
14850
|
+
ggml_custom2_op_f32_t fun;
|
14851
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15144
14852
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15145
14853
|
}
|
15146
14854
|
break;
|
15147
14855
|
case GGML_OP_MAP_CUSTOM3:
|
15148
14856
|
{
|
15149
|
-
|
15150
|
-
|
14857
|
+
ggml_custom3_op_f32_t fun;
|
14858
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
14859
|
+
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
15151
14860
|
}
|
15152
14861
|
break;
|
15153
14862
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -15211,12 +14920,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15211
14920
|
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
|
15212
14921
|
}
|
15213
14922
|
if (src1->grad) {
|
15214
|
-
|
15215
|
-
|
15216
|
-
const size_t
|
15217
|
-
const size_t
|
15218
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15219
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
14923
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
14924
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
14925
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
14926
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15220
14927
|
|
15221
14928
|
struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
|
15222
14929
|
tensor->grad,
|
@@ -15365,73 +15072,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15365
15072
|
inplace);
|
15366
15073
|
}
|
15367
15074
|
} break;
|
15368
|
-
case GGML_OP_ABS:
|
15369
|
-
{
|
15370
|
-
if (src0->grad) {
|
15371
|
-
src0->grad =
|
15372
|
-
ggml_add_impl(ctx,
|
15373
|
-
src0->grad,
|
15374
|
-
ggml_mul(ctx,
|
15375
|
-
ggml_sgn(ctx, src0),
|
15376
|
-
tensor->grad),
|
15377
|
-
inplace);
|
15378
|
-
}
|
15379
|
-
} break;
|
15380
|
-
case GGML_OP_SGN:
|
15381
|
-
{
|
15382
|
-
if (src0->grad) {
|
15383
|
-
// noop
|
15384
|
-
}
|
15385
|
-
} break;
|
15386
|
-
case GGML_OP_NEG:
|
15387
|
-
{
|
15388
|
-
if (src0->grad) {
|
15389
|
-
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15390
|
-
}
|
15391
|
-
} break;
|
15392
|
-
case GGML_OP_STEP:
|
15393
|
-
{
|
15394
|
-
if (src0->grad) {
|
15395
|
-
// noop
|
15396
|
-
}
|
15397
|
-
} break;
|
15398
|
-
case GGML_OP_TANH:
|
15399
|
-
{
|
15400
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15401
|
-
} break;
|
15402
|
-
case GGML_OP_ELU:
|
15403
|
-
{
|
15404
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15405
|
-
} break;
|
15406
|
-
case GGML_OP_RELU:
|
15407
|
-
{
|
15408
|
-
if (src0->grad) {
|
15409
|
-
src0->grad = ggml_sub_impl(ctx,
|
15410
|
-
src0->grad,
|
15411
|
-
ggml_mul(ctx,
|
15412
|
-
ggml_step(ctx, src0),
|
15413
|
-
tensor->grad),
|
15414
|
-
inplace);
|
15415
|
-
}
|
15416
|
-
} break;
|
15417
|
-
case GGML_OP_GELU:
|
15418
|
-
{
|
15419
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15420
|
-
} break;
|
15421
|
-
case GGML_OP_GELU_QUICK:
|
15422
|
-
{
|
15423
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15424
|
-
} break;
|
15425
|
-
case GGML_OP_SILU:
|
15426
|
-
{
|
15427
|
-
// necessary for llama
|
15428
|
-
if (src0->grad) {
|
15429
|
-
src0->grad = ggml_add_impl(ctx,
|
15430
|
-
src0->grad,
|
15431
|
-
ggml_silu_back(ctx, src0, tensor->grad),
|
15432
|
-
inplace);
|
15433
|
-
}
|
15434
|
-
} break;
|
15435
15075
|
case GGML_OP_SILU_BACK:
|
15436
15076
|
{
|
15437
15077
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -15524,12 +15164,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15524
15164
|
} break;
|
15525
15165
|
case GGML_OP_SET:
|
15526
15166
|
{
|
15527
|
-
|
15528
|
-
|
15529
|
-
const size_t
|
15530
|
-
const size_t
|
15531
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15532
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
15167
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
15168
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
15169
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
15170
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15533
15171
|
|
15534
15172
|
struct ggml_tensor * tensor_grad_view = NULL;
|
15535
15173
|
|
@@ -15606,8 +15244,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15606
15244
|
if (src0->grad) {
|
15607
15245
|
size_t offset;
|
15608
15246
|
|
15609
|
-
|
15610
|
-
memcpy(&offset, tensor->src[2]->data, sizeof(offset));
|
15247
|
+
memcpy(&offset, tensor->op_params, sizeof(offset));
|
15611
15248
|
|
15612
15249
|
size_t nb1 = tensor->nb[1];
|
15613
15250
|
size_t nb2 = tensor->nb[2];
|
@@ -15634,7 +15271,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15634
15271
|
{
|
15635
15272
|
// necessary for llama
|
15636
15273
|
if (src0->grad) {
|
15637
|
-
int32_t * axes = (int32_t *) tensor->
|
15274
|
+
int32_t * axes = (int32_t *) tensor->op_params;
|
15638
15275
|
int axis0 = axes[0] & 0x3;
|
15639
15276
|
int axis1 = axes[1] & 0x3;
|
15640
15277
|
int axis2 = axes[2] & 0x3;
|
@@ -15690,33 +15327,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15690
15327
|
{
|
15691
15328
|
// necessary for llama
|
15692
15329
|
if (src0->grad) {
|
15693
|
-
|
15694
|
-
assert(ggml_nelements(src1) == 2);
|
15695
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15330
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15696
15331
|
src0->grad =
|
15697
15332
|
ggml_add_impl(ctx, src0->grad,
|
15698
15333
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15699
15334
|
inplace);
|
15700
15335
|
}
|
15701
|
-
if (src1->grad) {
|
15702
|
-
// noop
|
15703
|
-
}
|
15704
15336
|
} break;
|
15705
15337
|
case GGML_OP_DIAG_MASK_ZERO:
|
15706
15338
|
{
|
15707
15339
|
// necessary for llama
|
15708
15340
|
if (src0->grad) {
|
15709
|
-
|
15710
|
-
assert(ggml_nelements(src1) == 2);
|
15711
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15341
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15712
15342
|
src0->grad =
|
15713
15343
|
ggml_add_impl(ctx, src0->grad,
|
15714
15344
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15715
15345
|
inplace);
|
15716
15346
|
}
|
15717
|
-
if (src1->grad) {
|
15718
|
-
// noop
|
15719
|
-
}
|
15720
15347
|
} break;
|
15721
15348
|
case GGML_OP_SOFT_MAX:
|
15722
15349
|
{
|
@@ -15737,12 +15364,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15737
15364
|
{
|
15738
15365
|
// necessary for llama
|
15739
15366
|
if (src0->grad) {
|
15740
|
-
|
15741
|
-
|
15742
|
-
const int
|
15743
|
-
const int
|
15744
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15745
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15367
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15368
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15369
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15370
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15746
15371
|
src0->grad = ggml_add_impl(ctx,
|
15747
15372
|
src0->grad,
|
15748
15373
|
ggml_rope_back(ctx,
|
@@ -15753,19 +15378,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15753
15378
|
n_ctx),
|
15754
15379
|
inplace);
|
15755
15380
|
}
|
15756
|
-
if (src1->grad) {
|
15757
|
-
// noop
|
15758
|
-
}
|
15759
15381
|
} break;
|
15760
15382
|
case GGML_OP_ROPE_BACK:
|
15761
15383
|
{
|
15762
15384
|
if (src0->grad) {
|
15763
|
-
|
15764
|
-
|
15765
|
-
const int
|
15766
|
-
const int
|
15767
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15768
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15385
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15386
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15387
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15388
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15769
15389
|
src0->grad = ggml_add_impl(ctx,
|
15770
15390
|
src0->grad,
|
15771
15391
|
ggml_rope(ctx,
|
@@ -15776,9 +15396,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15776
15396
|
n_ctx),
|
15777
15397
|
inplace);
|
15778
15398
|
}
|
15779
|
-
if (src1->grad) {
|
15780
|
-
// noop
|
15781
|
-
}
|
15782
15399
|
} break;
|
15783
15400
|
case GGML_OP_ALIBI:
|
15784
15401
|
{
|
@@ -15808,7 +15425,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15808
15425
|
{
|
15809
15426
|
struct ggml_tensor * flash_grad = NULL;
|
15810
15427
|
if (src0->grad || src1->grad || tensor->src[2]->grad) {
|
15811
|
-
int32_t t =
|
15428
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15812
15429
|
GGML_ASSERT(t == 0 || t == 1);
|
15813
15430
|
bool masked = t != 0;
|
15814
15431
|
flash_grad =
|
@@ -15971,6 +15588,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15971
15588
|
} break;
|
15972
15589
|
case GGML_OP_WIN_PART:
|
15973
15590
|
case GGML_OP_WIN_UNPART:
|
15591
|
+
case GGML_OP_UNARY:
|
15592
|
+
{
|
15593
|
+
switch (ggml_get_unary_op(tensor)) {
|
15594
|
+
case GGML_UNARY_OP_ABS:
|
15595
|
+
{
|
15596
|
+
if (src0->grad) {
|
15597
|
+
src0->grad =
|
15598
|
+
ggml_add_impl(ctx,
|
15599
|
+
src0->grad,
|
15600
|
+
ggml_mul(ctx,
|
15601
|
+
ggml_sgn(ctx, src0),
|
15602
|
+
tensor->grad),
|
15603
|
+
inplace);
|
15604
|
+
}
|
15605
|
+
} break;
|
15606
|
+
case GGML_UNARY_OP_SGN:
|
15607
|
+
{
|
15608
|
+
if (src0->grad) {
|
15609
|
+
// noop
|
15610
|
+
}
|
15611
|
+
} break;
|
15612
|
+
case GGML_UNARY_OP_NEG:
|
15613
|
+
{
|
15614
|
+
if (src0->grad) {
|
15615
|
+
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15616
|
+
}
|
15617
|
+
} break;
|
15618
|
+
case GGML_UNARY_OP_STEP:
|
15619
|
+
{
|
15620
|
+
if (src0->grad) {
|
15621
|
+
// noop
|
15622
|
+
}
|
15623
|
+
} break;
|
15624
|
+
case GGML_UNARY_OP_TANH:
|
15625
|
+
{
|
15626
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15627
|
+
} break;
|
15628
|
+
case GGML_UNARY_OP_ELU:
|
15629
|
+
{
|
15630
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15631
|
+
} break;
|
15632
|
+
case GGML_UNARY_OP_RELU:
|
15633
|
+
{
|
15634
|
+
if (src0->grad) {
|
15635
|
+
src0->grad = ggml_add_impl(ctx,
|
15636
|
+
src0->grad,
|
15637
|
+
ggml_mul(ctx,
|
15638
|
+
ggml_step(ctx, src0),
|
15639
|
+
tensor->grad),
|
15640
|
+
inplace);
|
15641
|
+
}
|
15642
|
+
} break;
|
15643
|
+
case GGML_UNARY_OP_GELU:
|
15644
|
+
{
|
15645
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15646
|
+
} break;
|
15647
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15648
|
+
{
|
15649
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15650
|
+
} break;
|
15651
|
+
case GGML_UNARY_OP_SILU:
|
15652
|
+
{
|
15653
|
+
// necessary for llama
|
15654
|
+
if (src0->grad) {
|
15655
|
+
src0->grad = ggml_add_impl(ctx,
|
15656
|
+
src0->grad,
|
15657
|
+
ggml_silu_back(ctx, src0, tensor->grad),
|
15658
|
+
inplace);
|
15659
|
+
}
|
15660
|
+
} break;
|
15661
|
+
default:
|
15662
|
+
GGML_ASSERT(false);
|
15663
|
+
}
|
15664
|
+
} break;
|
15974
15665
|
case GGML_OP_MAP_UNARY:
|
15975
15666
|
case GGML_OP_MAP_BINARY:
|
15976
15667
|
case GGML_OP_MAP_CUSTOM1:
|
@@ -16006,6 +15697,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
16006
15697
|
}
|
16007
15698
|
}
|
16008
15699
|
|
15700
|
+
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
15701
|
+
|
15702
|
+
static size_t hash(void * p) {
|
15703
|
+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
15704
|
+
}
|
15705
|
+
|
15706
|
+
static bool hash_insert(void * hash_table[], void * p) {
|
15707
|
+
size_t h = hash(p);
|
15708
|
+
|
15709
|
+
// linear probing
|
15710
|
+
size_t i = h;
|
15711
|
+
while (hash_table[i] != NULL && hash_table[i] != p) {
|
15712
|
+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
15713
|
+
if (i == h) {
|
15714
|
+
// hash table is full
|
15715
|
+
GGML_ASSERT(false);
|
15716
|
+
}
|
15717
|
+
}
|
15718
|
+
|
15719
|
+
if (hash_table[i] == p) {
|
15720
|
+
return true;
|
15721
|
+
}
|
15722
|
+
|
15723
|
+
// insert
|
15724
|
+
hash_table[i] = p;
|
15725
|
+
return false;
|
15726
|
+
}
|
15727
|
+
|
16009
15728
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
16010
15729
|
if (node->grad == NULL) {
|
16011
15730
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
@@ -16016,16 +15735,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
16016
15735
|
}
|
16017
15736
|
|
16018
15737
|
// check if already visited
|
16019
|
-
|
16020
|
-
|
16021
|
-
return;
|
16022
|
-
}
|
16023
|
-
}
|
16024
|
-
|
16025
|
-
for (int i = 0; i < cgraph->n_leafs; i++) {
|
16026
|
-
if (cgraph->leafs[i] == node) {
|
16027
|
-
return;
|
16028
|
-
}
|
15738
|
+
if (hash_insert(cgraph->visited_hash_table, node)) {
|
15739
|
+
return;
|
16029
15740
|
}
|
16030
15741
|
|
16031
15742
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
@@ -16088,6 +15799,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16088
15799
|
/*.nodes =*/ { NULL },
|
16089
15800
|
/*.grads =*/ { NULL },
|
16090
15801
|
/*.leafs =*/ { NULL },
|
15802
|
+
/*.hash_table =*/ { NULL },
|
16091
15803
|
/*.perf_runs =*/ 0,
|
16092
15804
|
/*.perf_cycles =*/ 0,
|
16093
15805
|
/*.perf_time_us =*/ 0,
|
@@ -16129,13 +15841,42 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16129
15841
|
|
16130
15842
|
if (node->is_param) {
|
16131
15843
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16132
|
-
|
15844
|
+
ggml_build_forward_expand(&result, node->grad);
|
16133
15845
|
}
|
16134
15846
|
}
|
16135
15847
|
|
16136
15848
|
return result;
|
16137
15849
|
}
|
16138
15850
|
|
15851
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15852
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
15853
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15854
|
+
|
15855
|
+
*cgraph = (struct ggml_cgraph) {
|
15856
|
+
/*.n_nodes =*/ 0,
|
15857
|
+
/*.n_leafs =*/ 0,
|
15858
|
+
/*.nodes =*/ { NULL },
|
15859
|
+
/*.grads =*/ { NULL },
|
15860
|
+
/*.leafs =*/ { NULL },
|
15861
|
+
/*.hash_table =*/ { NULL },
|
15862
|
+
/*.perf_runs =*/ 0,
|
15863
|
+
/*.perf_cycles =*/ 0,
|
15864
|
+
/*.perf_time_us =*/ 0,
|
15865
|
+
};
|
15866
|
+
|
15867
|
+
return cgraph;
|
15868
|
+
}
|
15869
|
+
|
15870
|
+
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
15871
|
+
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
15872
|
+
ggml_build_forward_impl(cgraph, tensor, false);
|
15873
|
+
return cgraph;
|
15874
|
+
}
|
15875
|
+
|
15876
|
+
size_t ggml_graph_overhead(void) {
|
15877
|
+
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
|
15878
|
+
}
|
15879
|
+
|
16139
15880
|
//
|
16140
15881
|
// thread data
|
16141
15882
|
//
|
@@ -16201,7 +15942,7 @@ typedef pthread_t ggml_thread_t;
|
|
16201
15942
|
|
16202
15943
|
// Android's libc implementation "bionic" does not support setting affinity
|
16203
15944
|
#if defined(__linux__) && !defined(__BIONIC__)
|
16204
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) {
|
15945
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) {
|
16205
15946
|
if (!ggml_is_numa()) {
|
16206
15947
|
return;
|
16207
15948
|
}
|
@@ -16226,7 +15967,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) {
|
|
16226
15967
|
CPU_FREE(cpus);
|
16227
15968
|
}
|
16228
15969
|
|
16229
|
-
void clear_numa_thread_affinity(void) {
|
15970
|
+
static void clear_numa_thread_affinity(void) {
|
16230
15971
|
if (!ggml_is_numa()) {
|
16231
15972
|
return;
|
16232
15973
|
}
|
@@ -16250,8 +15991,8 @@ void clear_numa_thread_affinity(void) {
|
|
16250
15991
|
#else
|
16251
15992
|
// TODO: Windows etc.
|
16252
15993
|
// (the linux implementation may also work on BSD, someone should test)
|
16253
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
16254
|
-
void clear_numa_thread_affinity(void) {}
|
15994
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15995
|
+
static void clear_numa_thread_affinity(void) {}
|
16255
15996
|
#endif
|
16256
15997
|
|
16257
15998
|
struct ggml_compute_state_shared {
|
@@ -16463,21 +16204,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16463
16204
|
case GGML_OP_ARGMAX:
|
16464
16205
|
case GGML_OP_REPEAT:
|
16465
16206
|
case GGML_OP_REPEAT_BACK:
|
16466
|
-
|
16467
|
-
case GGML_OP_SGN:
|
16468
|
-
case GGML_OP_NEG:
|
16469
|
-
case GGML_OP_STEP:
|
16470
|
-
case GGML_OP_TANH:
|
16471
|
-
case GGML_OP_ELU:
|
16472
|
-
case GGML_OP_RELU:
|
16473
|
-
{
|
16207
|
+
{
|
16474
16208
|
n_tasks = 1;
|
16475
16209
|
} break;
|
16476
|
-
|
16477
|
-
case
|
16478
|
-
|
16479
|
-
|
16210
|
+
|
16211
|
+
case GGML_OP_UNARY:
|
16212
|
+
{
|
16213
|
+
switch (ggml_get_unary_op(node)) {
|
16214
|
+
case GGML_UNARY_OP_ABS:
|
16215
|
+
case GGML_UNARY_OP_SGN:
|
16216
|
+
case GGML_UNARY_OP_NEG:
|
16217
|
+
case GGML_UNARY_OP_STEP:
|
16218
|
+
case GGML_UNARY_OP_TANH:
|
16219
|
+
case GGML_UNARY_OP_ELU:
|
16220
|
+
case GGML_UNARY_OP_RELU:
|
16221
|
+
{
|
16222
|
+
n_tasks = 1;
|
16223
|
+
} break;
|
16224
|
+
|
16225
|
+
case GGML_UNARY_OP_GELU:
|
16226
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
16227
|
+
case GGML_UNARY_OP_SILU:
|
16228
|
+
{
|
16229
|
+
n_tasks = n_threads;
|
16230
|
+
} break;
|
16231
|
+
}
|
16232
|
+
} break;
|
16480
16233
|
case GGML_OP_SILU_BACK:
|
16234
|
+
case GGML_OP_MUL:
|
16481
16235
|
case GGML_OP_NORM:
|
16482
16236
|
case GGML_OP_RMS_NORM:
|
16483
16237
|
case GGML_OP_RMS_NORM_BACK:
|
@@ -16542,10 +16296,10 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16542
16296
|
case GGML_OP_GET_ROWS:
|
16543
16297
|
case GGML_OP_GET_ROWS_BACK:
|
16544
16298
|
case GGML_OP_DIAG:
|
16545
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16546
16299
|
{
|
16547
16300
|
n_tasks = 1;
|
16548
16301
|
} break;
|
16302
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
16549
16303
|
case GGML_OP_DIAG_MASK_INF:
|
16550
16304
|
case GGML_OP_SOFT_MAX:
|
16551
16305
|
case GGML_OP_SOFT_MAX_BACK:
|
@@ -16838,10 +16592,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
16838
16592
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16839
16593
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16840
16594
|
|
16841
|
-
struct
|
16842
|
-
GGML_ASSERT(buf);
|
16595
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
16843
16596
|
|
16844
|
-
cplan.work_data =
|
16597
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
16845
16598
|
|
16846
16599
|
ggml_graph_compute(cgraph, &cplan);
|
16847
16600
|
}
|
@@ -16992,7 +16745,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16992
16745
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16993
16746
|
}
|
16994
16747
|
|
16995
|
-
fwrite(tensor->name,
|
16748
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16749
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
16996
16750
|
|
16997
16751
|
// dump the data
|
16998
16752
|
// TODO: pad this to 32 byte boundary
|
@@ -17025,7 +16779,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
17025
16779
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
17026
16780
|
}
|
17027
16781
|
|
17028
|
-
fwrite(tensor->name,
|
16782
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16783
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
17029
16784
|
|
17030
16785
|
// output the op arguments
|
17031
16786
|
{
|
@@ -17206,7 +16961,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17206
16961
|
|
17207
16962
|
tensor->op = (enum ggml_op) op;
|
17208
16963
|
|
17209
|
-
memcpy(tensor->name,
|
16964
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
16965
|
+
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
17210
16966
|
|
17211
16967
|
tensor->data = (void *) ptr;
|
17212
16968
|
|
@@ -17251,7 +17007,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17251
17007
|
nb[j] = nb_cur;
|
17252
17008
|
}
|
17253
17009
|
|
17254
|
-
const char * ptr_name
|
17010
|
+
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
|
17011
|
+
const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS;
|
17255
17012
|
|
17256
17013
|
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t);
|
17257
17014
|
|
@@ -17288,8 +17045,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17288
17045
|
{
|
17289
17046
|
tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0);
|
17290
17047
|
|
17291
|
-
|
17292
|
-
memcpy(&offs,
|
17048
|
+
size_t offs;
|
17049
|
+
memcpy(&offs, ptr_op_params, sizeof(offs));
|
17293
17050
|
|
17294
17051
|
tensor->data = ((char *) tensor->data) + offs;
|
17295
17052
|
} break;
|
@@ -17309,7 +17066,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17309
17066
|
} break;
|
17310
17067
|
}
|
17311
17068
|
|
17312
|
-
memcpy(tensor->name,
|
17069
|
+
memcpy(tensor->name, ptr_name, GGML_MAX_NAME);
|
17070
|
+
memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS);
|
17313
17071
|
|
17314
17072
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
17315
17073
|
tensor->nb[j] = nb[j];
|
@@ -17343,7 +17101,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17343
17101
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17344
17102
|
i,
|
17345
17103
|
node->ne[0], node->ne[1], node->ne[2],
|
17346
|
-
|
17104
|
+
ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17347
17105
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17348
17106
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17349
17107
|
(double) node->perf_time_us / 1000.0,
|
@@ -17357,7 +17115,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17357
17115
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
17358
17116
|
i,
|
17359
17117
|
node->ne[0], node->ne[1],
|
17360
|
-
|
17118
|
+
ggml_op_name(node->op));
|
17361
17119
|
}
|
17362
17120
|
|
17363
17121
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -17365,7 +17123,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17365
17123
|
continue;
|
17366
17124
|
}
|
17367
17125
|
|
17368
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
17126
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0);
|
17369
17127
|
}
|
17370
17128
|
|
17371
17129
|
GGML_PRINT("========================================\n");
|
@@ -17459,13 +17217,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17459
17217
|
}
|
17460
17218
|
|
17461
17219
|
if (node->n_dims == 2) {
|
17462
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1],
|
17220
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
17463
17221
|
} else {
|
17464
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2],
|
17222
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
17465
17223
|
}
|
17466
17224
|
|
17467
17225
|
if (node->grad) {
|
17468
|
-
fprintf(fp, " | <g>%s\"; ]\n",
|
17226
|
+
fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
|
17469
17227
|
} else {
|
17470
17228
|
fprintf(fp, "\"; ]\n");
|
17471
17229
|
}
|