llama_cpp 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +439 -9
- data/ext/llama_cpp/src/ggml-cuda.cu +759 -136
- data/ext/llama_cpp/src/ggml-metal.h +7 -0
- data/ext/llama_cpp/src/ggml-metal.m +250 -111
- data/ext/llama_cpp/src/ggml-metal.metal +614 -483
- data/ext/llama_cpp/src/ggml.c +793 -1032
- data/ext/llama_cpp/src/ggml.h +95 -18
- data/ext/llama_cpp/src/k_quants.c +327 -3
- data/ext/llama_cpp/src/k_quants.h +8 -0
- data/ext/llama_cpp/src/llama.cpp +626 -166
- data/ext/llama_cpp/src/llama.h +94 -10
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +1 -0
- data/sig/llama_cpp.rbs +36 -1
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -31,11 +31,17 @@
|
|
31
31
|
#include <unistd.h>
|
32
32
|
#endif
|
33
33
|
|
34
|
+
// static_assert should be a #define, but if it's not,
|
35
|
+
// fall back to the _Static_assert C11 keyword.
|
34
36
|
// if C99 - static_assert is noop
|
35
37
|
// ref: https://stackoverflow.com/a/53923785/4039976
|
36
38
|
#ifndef static_assert
|
39
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
40
|
+
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
41
|
+
#else
|
37
42
|
#define static_assert(cond, msg) struct global_scope_noop_trick
|
38
43
|
#endif
|
44
|
+
#endif
|
39
45
|
|
40
46
|
#if defined(_MSC_VER)
|
41
47
|
// disable "possible loss of data" to avoid hundreds of casts
|
@@ -112,10 +118,6 @@ typedef void * thread_ret_t;
|
|
112
118
|
#endif
|
113
119
|
#endif
|
114
120
|
|
115
|
-
#ifdef __HAIKU__
|
116
|
-
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
117
|
-
#endif
|
118
|
-
|
119
121
|
/*#define GGML_PERF*/
|
120
122
|
#define GGML_DEBUG 0
|
121
123
|
#define GGML_GELU_FP16
|
@@ -3438,7 +3440,9 @@ inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float
|
|
3438
3440
|
|
3439
3441
|
//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; }
|
3440
3442
|
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
|
3441
|
-
#if defined(
|
3443
|
+
#if defined(GGML_USE_ACCELERATE)
|
3444
|
+
vDSP_vsmul(y, 1, &v, y, 1, n);
|
3445
|
+
#elif defined(GGML_SIMD)
|
3442
3446
|
const int np = (n & ~(GGML_F32_STEP - 1));
|
3443
3447
|
|
3444
3448
|
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
|
@@ -3601,7 +3605,7 @@ inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) {
|
|
3601
3605
|
#endif
|
3602
3606
|
}
|
3603
3607
|
|
3604
|
-
inline static void
|
3608
|
+
inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float * x) {
|
3605
3609
|
ggml_float sum = 0.0;
|
3606
3610
|
for (int i = 0; i < n; ++i) {
|
3607
3611
|
sum += (ggml_float)x[i];
|
@@ -3609,6 +3613,14 @@ inline static void ggml_vec_sum_ggf(const int n, ggml_float * s, const float * x
|
|
3609
3613
|
*s = sum;
|
3610
3614
|
}
|
3611
3615
|
|
3616
|
+
inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) {
|
3617
|
+
float sum = 0.0f;
|
3618
|
+
for (int i = 0; i < n; ++i) {
|
3619
|
+
sum += GGML_FP16_TO_FP32(x[i]);
|
3620
|
+
}
|
3621
|
+
*s = sum;
|
3622
|
+
}
|
3623
|
+
|
3612
3624
|
inline static void ggml_vec_max_f32(const int n, float * s, const float * x) {
|
3613
3625
|
#ifndef GGML_USE_ACCELERATE
|
3614
3626
|
float max = -INFINITY;
|
@@ -3748,16 +3760,6 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3748
3760
|
"ARGMAX",
|
3749
3761
|
"REPEAT",
|
3750
3762
|
"REPEAT_BACK",
|
3751
|
-
"ABS",
|
3752
|
-
"SGN",
|
3753
|
-
"NEG",
|
3754
|
-
"STEP",
|
3755
|
-
"TANH",
|
3756
|
-
"ELU",
|
3757
|
-
"RELU",
|
3758
|
-
"GELU",
|
3759
|
-
"GELU_QUICK",
|
3760
|
-
"SILU",
|
3761
3763
|
"SILU_BACK",
|
3762
3764
|
"NORM",
|
3763
3765
|
"RMS_NORM",
|
@@ -3796,6 +3798,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3796
3798
|
"WIN_PART",
|
3797
3799
|
"WIN_UNPART",
|
3798
3800
|
|
3801
|
+
"UNARY",
|
3802
|
+
|
3799
3803
|
"MAP_UNARY",
|
3800
3804
|
"MAP_BINARY",
|
3801
3805
|
|
@@ -3807,7 +3811,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|
3807
3811
|
"CROSS_ENTROPY_LOSS_BACK",
|
3808
3812
|
};
|
3809
3813
|
|
3810
|
-
static_assert(GGML_OP_COUNT ==
|
3814
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3811
3815
|
|
3812
3816
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
3813
3817
|
"none",
|
@@ -3828,16 +3832,6 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3828
3832
|
"argmax(x)",
|
3829
3833
|
"repeat(x)",
|
3830
3834
|
"repeat_back(x)",
|
3831
|
-
"abs(x)",
|
3832
|
-
"sgn(x)",
|
3833
|
-
"-x",
|
3834
|
-
"step(x)",
|
3835
|
-
"tanh(x)",
|
3836
|
-
"elu(x)",
|
3837
|
-
"relu(x)",
|
3838
|
-
"gelu(x)",
|
3839
|
-
"gelu_quick(x)",
|
3840
|
-
"silu(x)",
|
3841
3835
|
"silu_back(x)",
|
3842
3836
|
"norm(x)",
|
3843
3837
|
"rms_norm(x)",
|
@@ -3876,6 +3870,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3876
3870
|
"win_part(x)",
|
3877
3871
|
"win_unpart(x)",
|
3878
3872
|
|
3873
|
+
"unary(x)",
|
3874
|
+
|
3879
3875
|
"f(x)",
|
3880
3876
|
"f(x,y)",
|
3881
3877
|
|
@@ -3887,7 +3883,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|
3887
3883
|
"cross_entropy_loss_back(x,y)",
|
3888
3884
|
};
|
3889
3885
|
|
3890
|
-
static_assert(GGML_OP_COUNT ==
|
3886
|
+
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
3891
3887
|
|
3892
3888
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
3893
3889
|
|
@@ -4075,8 +4071,8 @@ bool ggml_is_numa(void) {
|
|
4075
4071
|
////////////////////////////////////////////////////////////////////////////////
|
4076
4072
|
|
4077
4073
|
void ggml_print_object(const struct ggml_object * obj) {
|
4078
|
-
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
|
4079
|
-
obj->offs, obj->size, (const void *) obj->next);
|
4074
|
+
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
|
4075
|
+
obj->type, obj->offs, obj->size, (const void *) obj->next);
|
4080
4076
|
}
|
4081
4077
|
|
4082
4078
|
void ggml_print_objects(const struct ggml_context * ctx) {
|
@@ -4143,6 +4139,10 @@ const char * ggml_op_name(enum ggml_op op) {
|
|
4143
4139
|
return GGML_OP_NAME[op];
|
4144
4140
|
}
|
4145
4141
|
|
4142
|
+
const char * ggml_op_symbol(enum ggml_op op) {
|
4143
|
+
return GGML_OP_SYMBOL[op];
|
4144
|
+
}
|
4145
|
+
|
4146
4146
|
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
4147
4147
|
return GGML_TYPE_SIZE[tensor->type];
|
4148
4148
|
}
|
@@ -4212,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
4212
4212
|
}
|
4213
4213
|
|
4214
4214
|
size_t ggml_tensor_overhead(void) {
|
4215
|
-
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE
|
4215
|
+
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
4216
4216
|
}
|
4217
4217
|
|
4218
4218
|
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
@@ -4229,6 +4229,15 @@ bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
|
4229
4229
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4230
4230
|
}
|
4231
4231
|
|
4232
|
+
static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * tensor) {
|
4233
|
+
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
|
+
|
4235
|
+
return
|
4236
|
+
tensor->nb[0] == GGML_TYPE_SIZE[tensor->type] &&
|
4237
|
+
tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
|
4238
|
+
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
4239
|
+
}
|
4240
|
+
|
4232
4241
|
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
4233
4242
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
4234
4243
|
|
@@ -4374,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
4374
4383
|
return NULL;
|
4375
4384
|
}
|
4376
4385
|
|
4377
|
-
const size_t mem_size =
|
4386
|
+
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
|
4378
4387
|
|
4379
4388
|
*ctx = (struct ggml_context) {
|
4380
4389
|
/*.mem_size =*/ mem_size,
|
@@ -4410,8 +4419,8 @@ void ggml_free(struct ggml_context * ctx) {
|
|
4410
4419
|
if (&g_state.contexts[i].context == ctx) {
|
4411
4420
|
g_state.contexts[i].used = false;
|
4412
4421
|
|
4413
|
-
GGML_PRINT_DEBUG("%s: context %d
|
4414
|
-
__func__, i, ctx
|
4422
|
+
GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
|
4423
|
+
__func__, i, ggml_used_mem(ctx));
|
4415
4424
|
|
4416
4425
|
if (ctx->mem_buffer_owned) {
|
4417
4426
|
GGML_ALIGNED_FREE(ctx->mem_buffer);
|
@@ -4441,6 +4450,10 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|
4441
4450
|
return result;
|
4442
4451
|
}
|
4443
4452
|
|
4453
|
+
bool ggml_get_no_alloc(struct ggml_context * ctx) {
|
4454
|
+
return ctx->no_alloc;
|
4455
|
+
}
|
4456
|
+
|
4444
4457
|
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
|
4445
4458
|
ctx->no_alloc = no_alloc;
|
4446
4459
|
}
|
@@ -4459,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4459
4472
|
struct ggml_object * obj = ctx->objects_begin;
|
4460
4473
|
|
4461
4474
|
while (obj != NULL) {
|
4462
|
-
|
4475
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
4476
|
+
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
|
4463
4477
|
|
4464
|
-
|
4478
|
+
const size_t size = ggml_nbytes(tensor);
|
4465
4479
|
|
4466
|
-
|
4467
|
-
|
4480
|
+
if (max_size < size) {
|
4481
|
+
max_size = size;
|
4482
|
+
}
|
4468
4483
|
}
|
4469
4484
|
|
4470
4485
|
obj = obj->next;
|
@@ -4478,7 +4493,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
|
|
4478
4493
|
// this is an error prone process, but it is necessary to support inplace
|
4479
4494
|
// operators when using scratch buffers
|
4480
4495
|
// TODO: implement a better way
|
4481
|
-
void ggml_scratch_save(struct ggml_context * ctx) {
|
4496
|
+
static void ggml_scratch_save(struct ggml_context * ctx) {
|
4482
4497
|
// this is needed to allow opt tensors to store their data
|
4483
4498
|
// TODO: again, need to find a better way
|
4484
4499
|
ctx->no_alloc_save = ctx->no_alloc;
|
@@ -4488,7 +4503,7 @@ void ggml_scratch_save(struct ggml_context * ctx) {
|
|
4488
4503
|
ctx->scratch.data = NULL;
|
4489
4504
|
}
|
4490
4505
|
|
4491
|
-
void ggml_scratch_load(struct ggml_context * ctx) {
|
4506
|
+
static void ggml_scratch_load(struct ggml_context * ctx) {
|
4492
4507
|
ctx->no_alloc = ctx->no_alloc_save;
|
4493
4508
|
|
4494
4509
|
ctx->scratch = ctx->scratch_save;
|
@@ -4496,12 +4511,7 @@ void ggml_scratch_load(struct ggml_context * ctx) {
|
|
4496
4511
|
|
4497
4512
|
////////////////////////////////////////////////////////////////////////////////
|
4498
4513
|
|
4499
|
-
struct
|
4500
|
-
struct ggml_context * ctx,
|
4501
|
-
enum ggml_type type,
|
4502
|
-
int n_dims,
|
4503
|
-
const int64_t* ne,
|
4504
|
-
void* data) {
|
4514
|
+
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
|
4505
4515
|
// always insert objects at the end of the context's memory pool
|
4506
4516
|
struct ggml_object * obj_cur = ctx->objects_end;
|
4507
4517
|
|
@@ -4509,77 +4519,79 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4509
4519
|
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
|
4510
4520
|
const size_t cur_end = cur_offs + cur_size;
|
4511
4521
|
|
4512
|
-
|
4513
|
-
|
4514
|
-
if (data == NULL && !ctx->no_alloc) {
|
4515
|
-
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4516
|
-
for (int i = 1; i < n_dims; i++) {
|
4517
|
-
size_needed *= ne[i];
|
4518
|
-
}
|
4519
|
-
// align to GGML_MEM_ALIGN
|
4520
|
-
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
|
4521
|
-
}
|
4522
|
+
// align to GGML_MEM_ALIGN
|
4523
|
+
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
|
4522
4524
|
|
4523
4525
|
char * const mem_buffer = ctx->mem_buffer;
|
4524
4526
|
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
|
4525
4527
|
|
4526
|
-
if (
|
4527
|
-
|
4528
|
+
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
|
4529
|
+
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4530
|
+
__func__, cur_end + size_needed, ctx->mem_size);
|
4531
|
+
assert(false);
|
4532
|
+
return NULL;
|
4533
|
+
}
|
4534
|
+
|
4535
|
+
*obj_new = (struct ggml_object) {
|
4536
|
+
.offs = cur_end + GGML_OBJECT_SIZE,
|
4537
|
+
.size = size_needed,
|
4538
|
+
.next = NULL,
|
4539
|
+
.type = type,
|
4540
|
+
};
|
4528
4541
|
|
4529
|
-
|
4530
|
-
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
|
4531
|
-
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
|
4532
|
-
assert(false);
|
4533
|
-
return NULL;
|
4534
|
-
}
|
4542
|
+
ggml_assert_aligned(mem_buffer + obj_new->offs);
|
4535
4543
|
|
4536
|
-
|
4537
|
-
|
4538
|
-
.size = size_needed,
|
4539
|
-
.next = NULL,
|
4540
|
-
};
|
4544
|
+
if (obj_cur != NULL) {
|
4545
|
+
obj_cur->next = obj_new;
|
4541
4546
|
} else {
|
4542
|
-
|
4543
|
-
|
4544
|
-
|
4545
|
-
|
4546
|
-
|
4547
|
+
// this is the first object in this context
|
4548
|
+
ctx->objects_begin = obj_new;
|
4549
|
+
}
|
4550
|
+
|
4551
|
+
ctx->objects_end = obj_new;
|
4552
|
+
|
4553
|
+
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4554
|
+
|
4555
|
+
return obj_new;
|
4556
|
+
}
|
4557
|
+
|
4558
|
+
static struct ggml_tensor * ggml_new_tensor_impl(
|
4559
|
+
struct ggml_context * ctx,
|
4560
|
+
enum ggml_type type,
|
4561
|
+
int n_dims,
|
4562
|
+
const int64_t* ne,
|
4563
|
+
void* data) {
|
4564
|
+
|
4565
|
+
size_t data_size = 0;
|
4566
|
+
|
4567
|
+
if (data == NULL && !ctx->no_alloc) {
|
4568
|
+
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
|
4569
|
+
for (int i = 1; i < n_dims; i++) {
|
4570
|
+
data_size *= ne[i];
|
4547
4571
|
}
|
4572
|
+
}
|
4548
4573
|
|
4549
|
-
|
4550
|
-
|
4551
|
-
|
4574
|
+
if (ctx->scratch.data != NULL && data == NULL) {
|
4575
|
+
// allocate tensor data in the scratch buffer
|
4576
|
+
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
|
4577
|
+
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
|
4578
|
+
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
|
4552
4579
|
assert(false);
|
4553
4580
|
return NULL;
|
4554
4581
|
}
|
4555
4582
|
|
4556
4583
|
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
|
4557
4584
|
|
4558
|
-
|
4559
|
-
.offs = cur_end + GGML_OBJECT_SIZE,
|
4560
|
-
.size = GGML_TENSOR_SIZE,
|
4561
|
-
.next = NULL,
|
4562
|
-
};
|
4563
|
-
|
4564
|
-
//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
|
4565
|
-
|
4566
|
-
ctx->scratch.offs += size_needed;
|
4567
|
-
}
|
4585
|
+
ctx->scratch.offs += data_size;
|
4568
4586
|
|
4569
|
-
|
4570
|
-
obj_cur->next = obj_new;
|
4571
|
-
} else {
|
4572
|
-
// this is the first object in this context
|
4573
|
-
ctx->objects_begin = obj_new;
|
4587
|
+
data_size = 0;
|
4574
4588
|
}
|
4575
4589
|
|
4576
|
-
|
4577
|
-
|
4578
|
-
//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
|
4590
|
+
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
|
4579
4591
|
|
4580
|
-
|
4592
|
+
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
|
4581
4593
|
|
4582
|
-
|
4594
|
+
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
|
4583
4595
|
|
4584
4596
|
*result = (struct ggml_tensor) {
|
4585
4597
|
/*.type =*/ type,
|
@@ -4588,6 +4600,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4588
4600
|
/*.ne =*/ { 1, 1, 1, 1 },
|
4589
4601
|
/*.nb =*/ { 0, 0, 0, 0 },
|
4590
4602
|
/*.op =*/ GGML_OP_NONE,
|
4603
|
+
/*.op_params =*/ {0},
|
4591
4604
|
/*.is_param =*/ false,
|
4592
4605
|
/*.grad =*/ NULL,
|
4593
4606
|
/*.src =*/ { NULL },
|
@@ -4618,6 +4631,21 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
4618
4631
|
return result;
|
4619
4632
|
}
|
4620
4633
|
|
4634
|
+
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
4635
|
+
assert(params_size <= GGML_MAX_OP_PARAMS);
|
4636
|
+
memcpy(tensor->op_params, params, params_size);
|
4637
|
+
}
|
4638
|
+
|
4639
|
+
static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) {
|
4640
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4641
|
+
return ((const int32_t *)(tensor->op_params))[i];
|
4642
|
+
}
|
4643
|
+
|
4644
|
+
static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) {
|
4645
|
+
assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t));
|
4646
|
+
((int32_t *)(tensor->op_params))[i] = value;
|
4647
|
+
}
|
4648
|
+
|
4621
4649
|
struct ggml_tensor * ggml_new_tensor(
|
4622
4650
|
struct ggml_context * ctx,
|
4623
4651
|
enum ggml_type type,
|
@@ -4949,6 +4977,11 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
4949
4977
|
return (float *)(tensor->data);
|
4950
4978
|
}
|
4951
4979
|
|
4980
|
+
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
4981
|
+
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
4982
|
+
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
4983
|
+
}
|
4984
|
+
|
4952
4985
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
4953
4986
|
return tensor->name;
|
4954
4987
|
}
|
@@ -4987,9 +5020,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
4987
5020
|
char * const mem_buffer = ctx->mem_buffer;
|
4988
5021
|
|
4989
5022
|
while (obj != NULL) {
|
4990
|
-
|
4991
|
-
|
4992
|
-
|
5023
|
+
if (obj->type == GGML_OBJECT_TENSOR) {
|
5024
|
+
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
|
5025
|
+
if (strcmp(cur->name, name) == 0) {
|
5026
|
+
return cur;
|
5027
|
+
}
|
4993
5028
|
}
|
4994
5029
|
|
4995
5030
|
obj = obj->next;
|
@@ -5002,7 +5037,7 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
|
|
5002
5037
|
|
5003
5038
|
// ggml_dup
|
5004
5039
|
|
5005
|
-
struct ggml_tensor * ggml_dup_impl(
|
5040
|
+
static struct ggml_tensor * ggml_dup_impl(
|
5006
5041
|
struct ggml_context * ctx,
|
5007
5042
|
struct ggml_tensor * a,
|
5008
5043
|
bool inplace) {
|
@@ -5017,7 +5052,6 @@ struct ggml_tensor * ggml_dup_impl(
|
|
5017
5052
|
result->op = GGML_OP_DUP;
|
5018
5053
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5019
5054
|
result->src[0] = a;
|
5020
|
-
result->src[1] = NULL;
|
5021
5055
|
|
5022
5056
|
return result;
|
5023
5057
|
}
|
@@ -5036,7 +5070,7 @@ struct ggml_tensor * ggml_dup_inplace(
|
|
5036
5070
|
|
5037
5071
|
// ggml_add
|
5038
5072
|
|
5039
|
-
struct ggml_tensor * ggml_add_impl(
|
5073
|
+
static struct ggml_tensor * ggml_add_impl(
|
5040
5074
|
struct ggml_context * ctx,
|
5041
5075
|
struct ggml_tensor * a,
|
5042
5076
|
struct ggml_tensor * b,
|
@@ -5079,7 +5113,7 @@ struct ggml_tensor * ggml_add_inplace(
|
|
5079
5113
|
|
5080
5114
|
// ggml_add1
|
5081
5115
|
|
5082
|
-
struct ggml_tensor * ggml_add1_impl(
|
5116
|
+
static struct ggml_tensor * ggml_add1_impl(
|
5083
5117
|
struct ggml_context * ctx,
|
5084
5118
|
struct ggml_tensor * a,
|
5085
5119
|
struct ggml_tensor * b,
|
@@ -5119,7 +5153,7 @@ struct ggml_tensor * ggml_add1_inplace(
|
|
5119
5153
|
|
5120
5154
|
// ggml_acc
|
5121
5155
|
|
5122
|
-
struct ggml_tensor * ggml_acc_impl(
|
5156
|
+
static struct ggml_tensor * ggml_acc_impl(
|
5123
5157
|
struct ggml_context * ctx,
|
5124
5158
|
struct ggml_tensor * a,
|
5125
5159
|
struct ggml_tensor * b,
|
@@ -5141,23 +5175,13 @@ struct ggml_tensor * ggml_acc_impl(
|
|
5141
5175
|
|
5142
5176
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5143
5177
|
|
5144
|
-
|
5145
|
-
|
5146
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
5147
|
-
|
5148
|
-
((int32_t *) c->data)[0] = nb1;
|
5149
|
-
((int32_t *) c->data)[1] = nb2;
|
5150
|
-
((int32_t *) c->data)[2] = nb3;
|
5151
|
-
((int32_t *) c->data)[3] = offset;
|
5152
|
-
((int32_t *) c->data)[4] = inplace ? 1 : 0;
|
5153
|
-
|
5154
|
-
ggml_scratch_load(ctx);
|
5178
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5179
|
+
ggml_set_op_params(result, params, sizeof(params));
|
5155
5180
|
|
5156
5181
|
result->op = GGML_OP_ACC;
|
5157
5182
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5158
5183
|
result->src[0] = a;
|
5159
5184
|
result->src[1] = b;
|
5160
|
-
result->src[2] = c;
|
5161
5185
|
|
5162
5186
|
return result;
|
5163
5187
|
}
|
@@ -5186,7 +5210,7 @@ struct ggml_tensor * ggml_acc_inplace(
|
|
5186
5210
|
|
5187
5211
|
// ggml_sub
|
5188
5212
|
|
5189
|
-
struct ggml_tensor * ggml_sub_impl(
|
5213
|
+
static struct ggml_tensor * ggml_sub_impl(
|
5190
5214
|
struct ggml_context * ctx,
|
5191
5215
|
struct ggml_tensor * a,
|
5192
5216
|
struct ggml_tensor * b,
|
@@ -5225,7 +5249,7 @@ struct ggml_tensor * ggml_sub_inplace(
|
|
5225
5249
|
|
5226
5250
|
// ggml_mul
|
5227
5251
|
|
5228
|
-
struct ggml_tensor * ggml_mul_impl(
|
5252
|
+
static struct ggml_tensor * ggml_mul_impl(
|
5229
5253
|
struct ggml_context * ctx,
|
5230
5254
|
struct ggml_tensor * a,
|
5231
5255
|
struct ggml_tensor * b,
|
@@ -5272,7 +5296,7 @@ struct ggml_tensor * ggml_mul_inplace(
|
|
5272
5296
|
|
5273
5297
|
// ggml_div
|
5274
5298
|
|
5275
|
-
struct ggml_tensor * ggml_div_impl(
|
5299
|
+
static struct ggml_tensor * ggml_div_impl(
|
5276
5300
|
struct ggml_context * ctx,
|
5277
5301
|
struct ggml_tensor * a,
|
5278
5302
|
struct ggml_tensor * b,
|
@@ -5315,7 +5339,7 @@ struct ggml_tensor * ggml_div_inplace(
|
|
5315
5339
|
|
5316
5340
|
// ggml_sqr
|
5317
5341
|
|
5318
|
-
struct ggml_tensor * ggml_sqr_impl(
|
5342
|
+
static struct ggml_tensor * ggml_sqr_impl(
|
5319
5343
|
struct ggml_context * ctx,
|
5320
5344
|
struct ggml_tensor * a,
|
5321
5345
|
bool inplace) {
|
@@ -5330,7 +5354,6 @@ struct ggml_tensor * ggml_sqr_impl(
|
|
5330
5354
|
result->op = GGML_OP_SQR;
|
5331
5355
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5332
5356
|
result->src[0] = a;
|
5333
|
-
result->src[1] = NULL;
|
5334
5357
|
|
5335
5358
|
return result;
|
5336
5359
|
}
|
@@ -5349,7 +5372,7 @@ struct ggml_tensor * ggml_sqr_inplace(
|
|
5349
5372
|
|
5350
5373
|
// ggml_sqrt
|
5351
5374
|
|
5352
|
-
struct ggml_tensor * ggml_sqrt_impl(
|
5375
|
+
static struct ggml_tensor * ggml_sqrt_impl(
|
5353
5376
|
struct ggml_context * ctx,
|
5354
5377
|
struct ggml_tensor * a,
|
5355
5378
|
bool inplace) {
|
@@ -5364,7 +5387,6 @@ struct ggml_tensor * ggml_sqrt_impl(
|
|
5364
5387
|
result->op = GGML_OP_SQRT;
|
5365
5388
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5366
5389
|
result->src[0] = a;
|
5367
|
-
result->src[1] = NULL;
|
5368
5390
|
|
5369
5391
|
return result;
|
5370
5392
|
}
|
@@ -5384,7 +5406,7 @@ struct ggml_tensor * ggml_sqrt_inplace(
|
|
5384
5406
|
|
5385
5407
|
// ggml_log
|
5386
5408
|
|
5387
|
-
struct ggml_tensor * ggml_log_impl(
|
5409
|
+
static struct ggml_tensor * ggml_log_impl(
|
5388
5410
|
struct ggml_context * ctx,
|
5389
5411
|
struct ggml_tensor * a,
|
5390
5412
|
bool inplace) {
|
@@ -5399,7 +5421,6 @@ struct ggml_tensor * ggml_log_impl(
|
|
5399
5421
|
result->op = GGML_OP_LOG;
|
5400
5422
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5401
5423
|
result->src[0] = a;
|
5402
|
-
result->src[1] = NULL;
|
5403
5424
|
|
5404
5425
|
return result;
|
5405
5426
|
}
|
@@ -5432,7 +5453,6 @@ struct ggml_tensor * ggml_sum(
|
|
5432
5453
|
result->op = GGML_OP_SUM;
|
5433
5454
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5434
5455
|
result->src[0] = a;
|
5435
|
-
result->src[1] = NULL;
|
5436
5456
|
|
5437
5457
|
return result;
|
5438
5458
|
}
|
@@ -5459,7 +5479,6 @@ struct ggml_tensor * ggml_sum_rows(
|
|
5459
5479
|
result->op = GGML_OP_SUM_ROWS;
|
5460
5480
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5461
5481
|
result->src[0] = a;
|
5462
|
-
result->src[1] = NULL;
|
5463
5482
|
|
5464
5483
|
return result;
|
5465
5484
|
}
|
@@ -5482,7 +5501,6 @@ struct ggml_tensor * ggml_mean(
|
|
5482
5501
|
result->op = GGML_OP_MEAN;
|
5483
5502
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5484
5503
|
result->src[0] = a;
|
5485
|
-
result->src[1] = NULL;
|
5486
5504
|
|
5487
5505
|
return result;
|
5488
5506
|
}
|
@@ -5506,7 +5524,6 @@ struct ggml_tensor * ggml_argmax(
|
|
5506
5524
|
result->op = GGML_OP_ARGMAX;
|
5507
5525
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5508
5526
|
result->src[0] = a;
|
5509
|
-
result->src[1] = NULL;
|
5510
5527
|
|
5511
5528
|
return result;
|
5512
5529
|
}
|
@@ -5569,343 +5586,142 @@ struct ggml_tensor * ggml_repeat_back(
|
|
5569
5586
|
|
5570
5587
|
// ggml_abs
|
5571
5588
|
|
5572
|
-
struct ggml_tensor * ggml_abs_impl(
|
5573
|
-
struct ggml_context * ctx,
|
5574
|
-
struct ggml_tensor * a,
|
5575
|
-
bool inplace) {
|
5576
|
-
bool is_node = false;
|
5577
|
-
|
5578
|
-
if (!inplace && (a->grad)) {
|
5579
|
-
is_node = true;
|
5580
|
-
}
|
5581
|
-
|
5582
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5583
|
-
|
5584
|
-
result->op = GGML_OP_ABS;
|
5585
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5586
|
-
result->src[0] = a;
|
5587
|
-
result->src[1] = NULL;
|
5588
|
-
|
5589
|
-
return result;
|
5590
|
-
}
|
5591
|
-
|
5592
5589
|
struct ggml_tensor * ggml_abs(
|
5593
5590
|
struct ggml_context * ctx,
|
5594
5591
|
struct ggml_tensor * a) {
|
5595
|
-
return
|
5592
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
|
5596
5593
|
}
|
5597
5594
|
|
5598
5595
|
struct ggml_tensor * ggml_abs_inplace(
|
5599
5596
|
struct ggml_context * ctx,
|
5600
5597
|
struct ggml_tensor * a) {
|
5601
|
-
return
|
5598
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
|
5602
5599
|
}
|
5603
5600
|
|
5604
|
-
|
5605
5601
|
// ggml_sgn
|
5606
5602
|
|
5607
|
-
struct ggml_tensor * ggml_sgn_impl(
|
5608
|
-
struct ggml_context * ctx,
|
5609
|
-
struct ggml_tensor * a,
|
5610
|
-
bool inplace) {
|
5611
|
-
bool is_node = false;
|
5612
|
-
|
5613
|
-
if (!inplace && (a->grad)) {
|
5614
|
-
is_node = true;
|
5615
|
-
}
|
5616
|
-
|
5617
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5618
|
-
|
5619
|
-
result->op = GGML_OP_SGN;
|
5620
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5621
|
-
result->src[0] = a;
|
5622
|
-
result->src[1] = NULL;
|
5623
|
-
|
5624
|
-
return result;
|
5625
|
-
}
|
5626
|
-
|
5627
5603
|
struct ggml_tensor * ggml_sgn(
|
5628
5604
|
struct ggml_context * ctx,
|
5629
5605
|
struct ggml_tensor * a) {
|
5630
|
-
return
|
5606
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
|
5631
5607
|
}
|
5632
5608
|
|
5633
5609
|
struct ggml_tensor * ggml_sgn_inplace(
|
5634
5610
|
struct ggml_context * ctx,
|
5635
5611
|
struct ggml_tensor * a) {
|
5636
|
-
return
|
5612
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
|
5637
5613
|
}
|
5638
5614
|
|
5639
5615
|
// ggml_neg
|
5640
5616
|
|
5641
|
-
struct ggml_tensor * ggml_neg_impl(
|
5642
|
-
struct ggml_context * ctx,
|
5643
|
-
struct ggml_tensor * a,
|
5644
|
-
bool inplace) {
|
5645
|
-
bool is_node = false;
|
5646
|
-
|
5647
|
-
if (!inplace && (a->grad)) {
|
5648
|
-
is_node = true;
|
5649
|
-
}
|
5650
|
-
|
5651
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5652
|
-
|
5653
|
-
result->op = GGML_OP_NEG;
|
5654
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5655
|
-
result->src[0] = a;
|
5656
|
-
result->src[1] = NULL;
|
5657
|
-
|
5658
|
-
return result;
|
5659
|
-
}
|
5660
|
-
|
5661
5617
|
struct ggml_tensor * ggml_neg(
|
5662
5618
|
struct ggml_context * ctx,
|
5663
5619
|
struct ggml_tensor * a) {
|
5664
|
-
return
|
5620
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
|
5665
5621
|
}
|
5666
5622
|
|
5667
5623
|
struct ggml_tensor * ggml_neg_inplace(
|
5668
5624
|
struct ggml_context * ctx,
|
5669
5625
|
struct ggml_tensor * a) {
|
5670
|
-
return
|
5626
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
|
5671
5627
|
}
|
5672
5628
|
|
5673
5629
|
// ggml_step
|
5674
5630
|
|
5675
|
-
struct ggml_tensor * ggml_step_impl(
|
5676
|
-
struct ggml_context * ctx,
|
5677
|
-
struct ggml_tensor * a,
|
5678
|
-
bool inplace) {
|
5679
|
-
bool is_node = false;
|
5680
|
-
|
5681
|
-
if (!inplace && (a->grad)) {
|
5682
|
-
is_node = true;
|
5683
|
-
}
|
5684
|
-
|
5685
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5686
|
-
|
5687
|
-
result->op = GGML_OP_STEP;
|
5688
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5689
|
-
result->src[0] = a;
|
5690
|
-
result->src[1] = NULL;
|
5691
|
-
|
5692
|
-
return result;
|
5693
|
-
}
|
5694
|
-
|
5695
5631
|
struct ggml_tensor * ggml_step(
|
5696
5632
|
struct ggml_context * ctx,
|
5697
5633
|
struct ggml_tensor * a) {
|
5698
|
-
return
|
5634
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
|
5699
5635
|
}
|
5700
5636
|
|
5701
5637
|
struct ggml_tensor * ggml_step_inplace(
|
5702
5638
|
struct ggml_context * ctx,
|
5703
5639
|
struct ggml_tensor * a) {
|
5704
|
-
return
|
5640
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
|
5705
5641
|
}
|
5706
5642
|
|
5707
5643
|
// ggml_tanh
|
5708
5644
|
|
5709
|
-
struct ggml_tensor * ggml_tanh_impl(
|
5710
|
-
struct ggml_context * ctx,
|
5711
|
-
struct ggml_tensor * a,
|
5712
|
-
bool inplace) {
|
5713
|
-
bool is_node = false;
|
5714
|
-
|
5715
|
-
if (!inplace && (a->grad)) {
|
5716
|
-
is_node = true;
|
5717
|
-
}
|
5718
|
-
|
5719
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5720
|
-
|
5721
|
-
result->op = GGML_OP_TANH;
|
5722
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5723
|
-
result->src[0] = a;
|
5724
|
-
result->src[1] = NULL;
|
5725
|
-
|
5726
|
-
return result;
|
5727
|
-
}
|
5728
|
-
|
5729
5645
|
struct ggml_tensor * ggml_tanh(
|
5730
5646
|
struct ggml_context * ctx,
|
5731
5647
|
struct ggml_tensor * a) {
|
5732
|
-
return
|
5648
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
|
5733
5649
|
}
|
5734
5650
|
|
5735
5651
|
struct ggml_tensor * ggml_tanh_inplace(
|
5736
5652
|
struct ggml_context * ctx,
|
5737
5653
|
struct ggml_tensor * a) {
|
5738
|
-
return
|
5654
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
|
5739
5655
|
}
|
5740
5656
|
|
5741
5657
|
// ggml_elu
|
5742
5658
|
|
5743
|
-
struct ggml_tensor * ggml_elu_impl(
|
5744
|
-
struct ggml_context * ctx,
|
5745
|
-
struct ggml_tensor * a,
|
5746
|
-
bool inplace) {
|
5747
|
-
bool is_node = false;
|
5748
|
-
|
5749
|
-
if (!inplace && (a->grad)) {
|
5750
|
-
is_node = true;
|
5751
|
-
}
|
5752
|
-
|
5753
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5754
|
-
|
5755
|
-
result->op = GGML_OP_ELU;
|
5756
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5757
|
-
result->src[0] = a;
|
5758
|
-
result->src[1] = NULL;
|
5759
|
-
|
5760
|
-
return result;
|
5761
|
-
}
|
5762
|
-
|
5763
5659
|
struct ggml_tensor * ggml_elu(
|
5764
5660
|
struct ggml_context * ctx,
|
5765
5661
|
struct ggml_tensor * a) {
|
5766
|
-
return
|
5662
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
|
5767
5663
|
}
|
5768
5664
|
|
5769
5665
|
struct ggml_tensor * ggml_elu_inplace(
|
5770
5666
|
struct ggml_context * ctx,
|
5771
5667
|
struct ggml_tensor * a) {
|
5772
|
-
return
|
5668
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
|
5773
5669
|
}
|
5774
5670
|
|
5775
5671
|
// ggml_relu
|
5776
5672
|
|
5777
|
-
struct ggml_tensor * ggml_relu_impl(
|
5778
|
-
struct ggml_context * ctx,
|
5779
|
-
struct ggml_tensor * a,
|
5780
|
-
bool inplace) {
|
5781
|
-
bool is_node = false;
|
5782
|
-
|
5783
|
-
if (!inplace && (a->grad)) {
|
5784
|
-
is_node = true;
|
5785
|
-
}
|
5786
|
-
|
5787
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5788
|
-
|
5789
|
-
result->op = GGML_OP_RELU;
|
5790
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5791
|
-
result->src[0] = a;
|
5792
|
-
result->src[1] = NULL;
|
5793
|
-
|
5794
|
-
return result;
|
5795
|
-
}
|
5796
|
-
|
5797
5673
|
struct ggml_tensor * ggml_relu(
|
5798
5674
|
struct ggml_context * ctx,
|
5799
5675
|
struct ggml_tensor * a) {
|
5800
|
-
return
|
5676
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
|
5801
5677
|
}
|
5802
5678
|
|
5803
5679
|
struct ggml_tensor * ggml_relu_inplace(
|
5804
5680
|
struct ggml_context * ctx,
|
5805
5681
|
struct ggml_tensor * a) {
|
5806
|
-
return
|
5682
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
|
5807
5683
|
}
|
5808
5684
|
|
5809
5685
|
// ggml_gelu
|
5810
5686
|
|
5811
|
-
struct ggml_tensor * ggml_gelu_impl(
|
5812
|
-
struct ggml_context * ctx,
|
5813
|
-
struct ggml_tensor * a,
|
5814
|
-
bool inplace) {
|
5815
|
-
bool is_node = false;
|
5816
|
-
|
5817
|
-
if (!inplace && (a->grad)) {
|
5818
|
-
is_node = true;
|
5819
|
-
}
|
5820
|
-
|
5821
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5822
|
-
|
5823
|
-
result->op = GGML_OP_GELU;
|
5824
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5825
|
-
result->src[0] = a;
|
5826
|
-
result->src[1] = NULL;
|
5827
|
-
|
5828
|
-
return result;
|
5829
|
-
}
|
5830
|
-
|
5831
5687
|
struct ggml_tensor * ggml_gelu(
|
5832
5688
|
struct ggml_context * ctx,
|
5833
5689
|
struct ggml_tensor * a) {
|
5834
|
-
return
|
5690
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
|
5835
5691
|
}
|
5836
5692
|
|
5837
5693
|
struct ggml_tensor * ggml_gelu_inplace(
|
5838
5694
|
struct ggml_context * ctx,
|
5839
5695
|
struct ggml_tensor * a) {
|
5840
|
-
return
|
5696
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
|
5841
5697
|
}
|
5842
5698
|
|
5843
5699
|
// ggml_gelu_quick
|
5844
5700
|
|
5845
|
-
struct ggml_tensor * ggml_gelu_quick_impl(
|
5846
|
-
struct ggml_context * ctx,
|
5847
|
-
struct ggml_tensor * a,
|
5848
|
-
bool inplace) {
|
5849
|
-
bool is_node = false;
|
5850
|
-
|
5851
|
-
if (!inplace && (a->grad)) {
|
5852
|
-
is_node = true;
|
5853
|
-
}
|
5854
|
-
|
5855
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5856
|
-
|
5857
|
-
result->op = GGML_OP_GELU_QUICK;
|
5858
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5859
|
-
result->src[0] = a;
|
5860
|
-
result->src[1] = NULL;
|
5861
|
-
|
5862
|
-
return result;
|
5863
|
-
}
|
5864
|
-
|
5865
5701
|
struct ggml_tensor * ggml_gelu_quick(
|
5866
5702
|
struct ggml_context * ctx,
|
5867
5703
|
struct ggml_tensor * a) {
|
5868
|
-
return
|
5704
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5869
5705
|
}
|
5870
5706
|
|
5871
5707
|
struct ggml_tensor * ggml_gelu_quick_inplace(
|
5872
5708
|
struct ggml_context * ctx,
|
5873
5709
|
struct ggml_tensor * a) {
|
5874
|
-
return
|
5710
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
|
5875
5711
|
}
|
5876
5712
|
|
5877
5713
|
// ggml_silu
|
5878
5714
|
|
5879
|
-
struct ggml_tensor *
|
5880
|
-
struct ggml_context * ctx,
|
5881
|
-
struct ggml_tensor * a,
|
5882
|
-
bool inplace) {
|
5883
|
-
bool is_node = false;
|
5884
|
-
|
5885
|
-
if (!inplace && (a->grad)) {
|
5886
|
-
is_node = true;
|
5887
|
-
}
|
5888
|
-
|
5889
|
-
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5890
|
-
|
5891
|
-
result->op = GGML_OP_SILU;
|
5892
|
-
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5893
|
-
result->src[0] = a;
|
5894
|
-
result->src[1] = NULL;
|
5895
|
-
|
5896
|
-
return result;
|
5897
|
-
}
|
5898
|
-
|
5899
|
-
struct ggml_tensor * ggml_silu(
|
5715
|
+
struct ggml_tensor * ggml_silu(
|
5900
5716
|
struct ggml_context * ctx,
|
5901
5717
|
struct ggml_tensor * a) {
|
5902
|
-
return
|
5718
|
+
return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
|
5903
5719
|
}
|
5904
5720
|
|
5905
5721
|
struct ggml_tensor * ggml_silu_inplace(
|
5906
5722
|
struct ggml_context * ctx,
|
5907
5723
|
struct ggml_tensor * a) {
|
5908
|
-
return
|
5724
|
+
return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
|
5909
5725
|
}
|
5910
5726
|
|
5911
5727
|
// ggml_silu_back
|
@@ -5933,7 +5749,7 @@ struct ggml_tensor * ggml_silu_back(
|
|
5933
5749
|
|
5934
5750
|
// ggml_norm
|
5935
5751
|
|
5936
|
-
struct ggml_tensor * ggml_norm_impl(
|
5752
|
+
static struct ggml_tensor * ggml_norm_impl(
|
5937
5753
|
struct ggml_context * ctx,
|
5938
5754
|
struct ggml_tensor * a,
|
5939
5755
|
bool inplace) {
|
@@ -5946,10 +5762,11 @@ struct ggml_tensor * ggml_norm_impl(
|
|
5946
5762
|
|
5947
5763
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5948
5764
|
|
5765
|
+
// TODO: maybe store epsilon here?
|
5766
|
+
|
5949
5767
|
result->op = GGML_OP_NORM;
|
5950
5768
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5951
5769
|
result->src[0] = a;
|
5952
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5953
5770
|
|
5954
5771
|
return result;
|
5955
5772
|
}
|
@@ -5966,9 +5783,10 @@ struct ggml_tensor * ggml_norm_inplace(
|
|
5966
5783
|
return ggml_norm_impl(ctx, a, true);
|
5967
5784
|
}
|
5968
5785
|
|
5969
|
-
struct ggml_tensor * ggml_rms_norm_impl(
|
5786
|
+
static struct ggml_tensor * ggml_rms_norm_impl(
|
5970
5787
|
struct ggml_context * ctx,
|
5971
5788
|
struct ggml_tensor * a,
|
5789
|
+
float eps,
|
5972
5790
|
bool inplace) {
|
5973
5791
|
bool is_node = false;
|
5974
5792
|
|
@@ -5978,24 +5796,27 @@ struct ggml_tensor * ggml_rms_norm_impl(
|
|
5978
5796
|
|
5979
5797
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
5980
5798
|
|
5799
|
+
ggml_set_op_params(result, &eps, sizeof(eps));
|
5800
|
+
|
5981
5801
|
result->op = GGML_OP_RMS_NORM;
|
5982
5802
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5983
5803
|
result->src[0] = a;
|
5984
|
-
result->src[1] = NULL; // TODO: maybe store epsilon here?
|
5985
5804
|
|
5986
5805
|
return result;
|
5987
5806
|
}
|
5988
5807
|
|
5989
5808
|
struct ggml_tensor * ggml_rms_norm(
|
5990
5809
|
struct ggml_context * ctx,
|
5991
|
-
struct ggml_tensor * a
|
5992
|
-
|
5810
|
+
struct ggml_tensor * a,
|
5811
|
+
float eps) {
|
5812
|
+
return ggml_rms_norm_impl(ctx, a, eps, false);
|
5993
5813
|
}
|
5994
5814
|
|
5995
5815
|
struct ggml_tensor * ggml_rms_norm_inplace(
|
5996
5816
|
struct ggml_context * ctx,
|
5997
|
-
struct ggml_tensor * a
|
5998
|
-
|
5817
|
+
struct ggml_tensor * a,
|
5818
|
+
float eps) {
|
5819
|
+
return ggml_rms_norm_impl(ctx, a, eps, true);
|
5999
5820
|
}
|
6000
5821
|
|
6001
5822
|
struct ggml_tensor * ggml_rms_norm_back(
|
@@ -6074,7 +5895,7 @@ struct ggml_tensor * ggml_out_prod(
|
|
6074
5895
|
|
6075
5896
|
// ggml_scale
|
6076
5897
|
|
6077
|
-
struct ggml_tensor * ggml_scale_impl(
|
5898
|
+
static struct ggml_tensor * ggml_scale_impl(
|
6078
5899
|
struct ggml_context * ctx,
|
6079
5900
|
struct ggml_tensor * a,
|
6080
5901
|
struct ggml_tensor * b,
|
@@ -6114,7 +5935,7 @@ struct ggml_tensor * ggml_scale_inplace(
|
|
6114
5935
|
|
6115
5936
|
// ggml_set
|
6116
5937
|
|
6117
|
-
struct ggml_tensor * ggml_set_impl(
|
5938
|
+
static struct ggml_tensor * ggml_set_impl(
|
6118
5939
|
struct ggml_context * ctx,
|
6119
5940
|
struct ggml_tensor * a,
|
6120
5941
|
struct ggml_tensor * b,
|
@@ -6134,23 +5955,13 @@ struct ggml_tensor * ggml_set_impl(
|
|
6134
5955
|
// make a view of the destination
|
6135
5956
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6136
5957
|
|
6137
|
-
|
6138
|
-
|
6139
|
-
struct ggml_tensor * c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 5);
|
6140
|
-
|
6141
|
-
(( int32_t * ) c->data)[0] = nb1;
|
6142
|
-
(( int32_t * ) c->data)[1] = nb2;
|
6143
|
-
(( int32_t * ) c->data)[2] = nb3;
|
6144
|
-
(( int32_t * ) c->data)[3] = offset;
|
6145
|
-
(( int32_t * ) c->data)[4] = inplace ? 1 : 0;
|
6146
|
-
|
6147
|
-
ggml_scratch_load(ctx);
|
5958
|
+
int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
|
5959
|
+
ggml_set_op_params(result, params, sizeof(params));
|
6148
5960
|
|
6149
5961
|
result->op = GGML_OP_SET;
|
6150
5962
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6151
5963
|
result->src[0] = a;
|
6152
5964
|
result->src[1] = b;
|
6153
|
-
result->src[2] = c;
|
6154
5965
|
|
6155
5966
|
return result;
|
6156
5967
|
}
|
@@ -6214,7 +6025,7 @@ struct ggml_tensor * ggml_set_2d_inplace(
|
|
6214
6025
|
|
6215
6026
|
// ggml_cpy
|
6216
6027
|
|
6217
|
-
struct ggml_tensor * ggml_cpy_impl(
|
6028
|
+
static struct ggml_tensor * ggml_cpy_impl(
|
6218
6029
|
struct ggml_context * ctx,
|
6219
6030
|
struct ggml_tensor * a,
|
6220
6031
|
struct ggml_tensor * b,
|
@@ -6259,7 +6070,7 @@ struct ggml_tensor * ggml_cpy_inplace(
|
|
6259
6070
|
|
6260
6071
|
// ggml_cont
|
6261
6072
|
|
6262
|
-
struct ggml_tensor * ggml_cont_impl(
|
6073
|
+
static struct ggml_tensor * ggml_cont_impl(
|
6263
6074
|
struct ggml_context * ctx,
|
6264
6075
|
struct ggml_tensor * a,
|
6265
6076
|
bool inplace) {
|
@@ -6275,7 +6086,6 @@ struct ggml_tensor * ggml_cont_impl(
|
|
6275
6086
|
result->op = GGML_OP_CONT;
|
6276
6087
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6277
6088
|
result->src[0] = a;
|
6278
|
-
result->src[1] = NULL;
|
6279
6089
|
|
6280
6090
|
return result;
|
6281
6091
|
}
|
@@ -6319,7 +6129,6 @@ struct ggml_tensor * ggml_reshape(
|
|
6319
6129
|
result->op = GGML_OP_RESHAPE;
|
6320
6130
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6321
6131
|
result->src[0] = a;
|
6322
|
-
result->src[1] = NULL;
|
6323
6132
|
|
6324
6133
|
return result;
|
6325
6134
|
}
|
@@ -6344,7 +6153,6 @@ struct ggml_tensor * ggml_reshape_1d(
|
|
6344
6153
|
result->op = GGML_OP_RESHAPE;
|
6345
6154
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6346
6155
|
result->src[0] = a;
|
6347
|
-
result->src[1] = NULL;
|
6348
6156
|
|
6349
6157
|
return result;
|
6350
6158
|
}
|
@@ -6370,7 +6178,6 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
6370
6178
|
result->op = GGML_OP_RESHAPE;
|
6371
6179
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6372
6180
|
result->src[0] = a;
|
6373
|
-
result->src[1] = NULL;
|
6374
6181
|
|
6375
6182
|
return result;
|
6376
6183
|
}
|
@@ -6397,7 +6204,6 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
6397
6204
|
result->op = GGML_OP_RESHAPE;
|
6398
6205
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6399
6206
|
result->src[0] = a;
|
6400
|
-
result->src[1] = NULL;
|
6401
6207
|
|
6402
6208
|
return result;
|
6403
6209
|
}
|
@@ -6426,7 +6232,6 @@ struct ggml_tensor * ggml_reshape_4d(
|
|
6426
6232
|
result->op = GGML_OP_RESHAPE;
|
6427
6233
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6428
6234
|
result->src[0] = a;
|
6429
|
-
result->src[1] = NULL;
|
6430
6235
|
|
6431
6236
|
return result;
|
6432
6237
|
}
|
@@ -6448,19 +6253,11 @@ struct ggml_tensor * ggml_view_1d(
|
|
6448
6253
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
|
6449
6254
|
ggml_format_name(result, "%s (view)", a->name);
|
6450
6255
|
|
6451
|
-
|
6452
|
-
|
6453
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6454
|
-
ggml_set_name(offs, "offset");
|
6455
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6456
|
-
|
6457
|
-
ggml_scratch_load(ctx);
|
6256
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6458
6257
|
|
6459
6258
|
result->op = GGML_OP_VIEW;
|
6460
6259
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6461
6260
|
result->src[0] = a;
|
6462
|
-
result->src[1] = NULL;
|
6463
|
-
result->src[2] = offs;
|
6464
6261
|
|
6465
6262
|
return result;
|
6466
6263
|
}
|
@@ -6486,13 +6283,7 @@ struct ggml_tensor * ggml_view_2d(
|
|
6486
6283
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
6487
6284
|
ggml_format_name(result, "%s (view)", a->name);
|
6488
6285
|
|
6489
|
-
|
6490
|
-
|
6491
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6492
|
-
ggml_set_name(offs, "offset");
|
6493
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6494
|
-
|
6495
|
-
ggml_scratch_load(ctx);
|
6286
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6496
6287
|
|
6497
6288
|
result->nb[1] = nb1;
|
6498
6289
|
result->nb[2] = result->nb[1]*ne1;
|
@@ -6501,8 +6292,6 @@ struct ggml_tensor * ggml_view_2d(
|
|
6501
6292
|
result->op = GGML_OP_VIEW;
|
6502
6293
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6503
6294
|
result->src[0] = a;
|
6504
|
-
result->src[1] = NULL;
|
6505
|
-
result->src[2] = offs;
|
6506
6295
|
|
6507
6296
|
return result;
|
6508
6297
|
}
|
@@ -6530,13 +6319,7 @@ struct ggml_tensor * ggml_view_3d(
|
|
6530
6319
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
6531
6320
|
ggml_format_name(result, "%s (view)", a->name);
|
6532
6321
|
|
6533
|
-
|
6534
|
-
|
6535
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6536
|
-
ggml_set_name(offs, "offset");
|
6537
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6538
|
-
|
6539
|
-
ggml_scratch_load(ctx);
|
6322
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6540
6323
|
|
6541
6324
|
result->nb[1] = nb1;
|
6542
6325
|
result->nb[2] = nb2;
|
@@ -6545,8 +6328,6 @@ struct ggml_tensor * ggml_view_3d(
|
|
6545
6328
|
result->op = GGML_OP_VIEW;
|
6546
6329
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6547
6330
|
result->src[0] = a;
|
6548
|
-
result->src[1] = NULL;
|
6549
|
-
result->src[2] = offs;
|
6550
6331
|
|
6551
6332
|
return result;
|
6552
6333
|
}
|
@@ -6576,13 +6357,7 @@ struct ggml_tensor * ggml_view_4d(
|
|
6576
6357
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
|
6577
6358
|
ggml_format_name(result, "%s (view)", a->name);
|
6578
6359
|
|
6579
|
-
|
6580
|
-
|
6581
|
-
struct ggml_tensor * offs = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6582
|
-
ggml_set_name(offs, "offset");
|
6583
|
-
memcpy(offs->data, &offset, 2*sizeof(int32_t));
|
6584
|
-
|
6585
|
-
ggml_scratch_load(ctx);
|
6360
|
+
ggml_set_op_params(result, &offset, sizeof(offset));
|
6586
6361
|
|
6587
6362
|
result->nb[1] = nb1;
|
6588
6363
|
result->nb[2] = nb2;
|
@@ -6591,8 +6366,6 @@ struct ggml_tensor * ggml_view_4d(
|
|
6591
6366
|
result->op = GGML_OP_VIEW;
|
6592
6367
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6593
6368
|
result->src[0] = a;
|
6594
|
-
result->src[1] = NULL;
|
6595
|
-
result->src[2] = offs;
|
6596
6369
|
|
6597
6370
|
return result;
|
6598
6371
|
}
|
@@ -6653,22 +6426,9 @@ struct ggml_tensor * ggml_permute(
|
|
6653
6426
|
result->op = GGML_OP_PERMUTE;
|
6654
6427
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6655
6428
|
result->src[0] = a;
|
6656
|
-
result->src[1] = NULL;
|
6657
|
-
|
6658
|
-
if (is_node) {
|
6659
|
-
ggml_scratch_save(ctx);
|
6660
|
-
|
6661
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 4);
|
6662
|
-
|
6663
|
-
((int32_t *) b->data)[0] = axis0;
|
6664
|
-
((int32_t *) b->data)[1] = axis1;
|
6665
|
-
((int32_t *) b->data)[2] = axis2;
|
6666
|
-
((int32_t *) b->data)[3] = axis3;
|
6667
6429
|
|
6668
|
-
|
6669
|
-
|
6670
|
-
result->src[2] = b;
|
6671
|
-
}
|
6430
|
+
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
6431
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6672
6432
|
|
6673
6433
|
return result;
|
6674
6434
|
}
|
@@ -6696,7 +6456,6 @@ struct ggml_tensor * ggml_transpose(
|
|
6696
6456
|
result->op = GGML_OP_TRANSPOSE;
|
6697
6457
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6698
6458
|
result->src[0] = a;
|
6699
|
-
result->src[1] = NULL;
|
6700
6459
|
|
6701
6460
|
return result;
|
6702
6461
|
}
|
@@ -6774,7 +6533,6 @@ struct ggml_tensor * ggml_diag(
|
|
6774
6533
|
result->op = GGML_OP_DIAG;
|
6775
6534
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6776
6535
|
result->src[0] = a;
|
6777
|
-
result->src[1] = NULL;
|
6778
6536
|
|
6779
6537
|
return result;
|
6780
6538
|
}
|
@@ -6782,7 +6540,7 @@ struct ggml_tensor * ggml_diag(
|
|
6782
6540
|
|
6783
6541
|
// ggml_diag_mask_inf
|
6784
6542
|
|
6785
|
-
struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6543
|
+
static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
6786
6544
|
struct ggml_context * ctx,
|
6787
6545
|
struct ggml_tensor * a,
|
6788
6546
|
int n_past,
|
@@ -6795,19 +6553,12 @@ struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|
6795
6553
|
|
6796
6554
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6797
6555
|
|
6798
|
-
|
6799
|
-
|
6800
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6801
|
-
|
6802
|
-
((int32_t *) b->data)[0] = n_past;
|
6803
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6804
|
-
|
6805
|
-
ggml_scratch_load(ctx);
|
6556
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6557
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6806
6558
|
|
6807
6559
|
result->op = GGML_OP_DIAG_MASK_INF;
|
6808
6560
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6809
6561
|
result->src[0] = a;
|
6810
|
-
result->src[1] = b;
|
6811
6562
|
|
6812
6563
|
return result;
|
6813
6564
|
}
|
@@ -6829,7 +6580,7 @@ struct ggml_tensor * ggml_diag_mask_inf_inplace(
|
|
6829
6580
|
|
6830
6581
|
// ggml_diag_mask_zero
|
6831
6582
|
|
6832
|
-
struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6583
|
+
static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
6833
6584
|
struct ggml_context * ctx,
|
6834
6585
|
struct ggml_tensor * a,
|
6835
6586
|
int n_past,
|
@@ -6842,20 +6593,12 @@ struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|
6842
6593
|
|
6843
6594
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6844
6595
|
|
6845
|
-
|
6846
|
-
|
6847
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 2);
|
6848
|
-
ggml_set_name(b, "n_past, inplace");
|
6849
|
-
|
6850
|
-
((int32_t *) b->data)[0] = n_past;
|
6851
|
-
((int32_t *) b->data)[1] = inplace ? 1 : 0;
|
6852
|
-
|
6853
|
-
ggml_scratch_load(ctx);
|
6596
|
+
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
6597
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6854
6598
|
|
6855
6599
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
6856
6600
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6857
6601
|
result->src[0] = a;
|
6858
|
-
result->src[1] = b;
|
6859
6602
|
|
6860
6603
|
return result;
|
6861
6604
|
}
|
@@ -6876,7 +6619,7 @@ struct ggml_tensor * ggml_diag_mask_zero_inplace(
|
|
6876
6619
|
|
6877
6620
|
// ggml_soft_max
|
6878
6621
|
|
6879
|
-
struct ggml_tensor * ggml_soft_max_impl(
|
6622
|
+
static struct ggml_tensor * ggml_soft_max_impl(
|
6880
6623
|
struct ggml_context * ctx,
|
6881
6624
|
struct ggml_tensor * a,
|
6882
6625
|
bool inplace) {
|
@@ -6891,7 +6634,6 @@ struct ggml_tensor * ggml_soft_max_impl(
|
|
6891
6634
|
result->op = GGML_OP_SOFT_MAX;
|
6892
6635
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6893
6636
|
result->src[0] = a;
|
6894
|
-
result->src[1] = NULL;
|
6895
6637
|
|
6896
6638
|
return result;
|
6897
6639
|
}
|
@@ -6911,7 +6653,7 @@ struct ggml_tensor * ggml_soft_max_inplace(
|
|
6911
6653
|
|
6912
6654
|
// ggml_soft_max_back
|
6913
6655
|
|
6914
|
-
struct ggml_tensor * ggml_soft_max_back_impl(
|
6656
|
+
static struct ggml_tensor * ggml_soft_max_back_impl(
|
6915
6657
|
struct ggml_context * ctx,
|
6916
6658
|
struct ggml_tensor * a,
|
6917
6659
|
struct ggml_tensor * b,
|
@@ -6948,13 +6690,15 @@ struct ggml_tensor * ggml_soft_max_back_inplace(
|
|
6948
6690
|
|
6949
6691
|
// ggml_rope
|
6950
6692
|
|
6951
|
-
struct ggml_tensor * ggml_rope_impl(
|
6693
|
+
static struct ggml_tensor * ggml_rope_impl(
|
6952
6694
|
struct ggml_context * ctx,
|
6953
6695
|
struct ggml_tensor * a,
|
6954
6696
|
int n_past,
|
6955
6697
|
int n_dims,
|
6956
6698
|
int mode,
|
6957
6699
|
int n_ctx,
|
6700
|
+
float freq_base,
|
6701
|
+
float freq_scale,
|
6958
6702
|
bool inplace) {
|
6959
6703
|
GGML_ASSERT(n_past >= 0);
|
6960
6704
|
bool is_node = false;
|
@@ -6965,21 +6709,14 @@ struct ggml_tensor * ggml_rope_impl(
|
|
6965
6709
|
|
6966
6710
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
6967
6711
|
|
6968
|
-
|
6969
|
-
|
6970
|
-
|
6971
|
-
|
6972
|
-
((int32_t *) b->data)[0] = n_past;
|
6973
|
-
((int32_t *) b->data)[1] = n_dims;
|
6974
|
-
((int32_t *) b->data)[2] = mode;
|
6975
|
-
((int32_t *) b->data)[3] = n_ctx;
|
6976
|
-
|
6977
|
-
ggml_scratch_load(ctx);
|
6712
|
+
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
6713
|
+
memcpy(params + 4, &freq_base, sizeof(float));
|
6714
|
+
memcpy(params + 5, &freq_scale, sizeof(float));
|
6715
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
6978
6716
|
|
6979
6717
|
result->op = GGML_OP_ROPE;
|
6980
6718
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
6981
6719
|
result->src[0] = a;
|
6982
|
-
result->src[1] = b;
|
6983
6720
|
|
6984
6721
|
return result;
|
6985
6722
|
}
|
@@ -6991,7 +6728,7 @@ struct ggml_tensor * ggml_rope(
|
|
6991
6728
|
int n_dims,
|
6992
6729
|
int mode,
|
6993
6730
|
int n_ctx) {
|
6994
|
-
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, false);
|
6731
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, false);
|
6995
6732
|
}
|
6996
6733
|
|
6997
6734
|
struct ggml_tensor * ggml_rope_inplace(
|
@@ -7001,7 +6738,19 @@ struct ggml_tensor * ggml_rope_inplace(
|
|
7001
6738
|
int n_dims,
|
7002
6739
|
int mode,
|
7003
6740
|
int n_ctx) {
|
7004
|
-
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, true);
|
6741
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
6742
|
+
}
|
6743
|
+
|
6744
|
+
struct ggml_tensor * ggml_rope_custom_inplace(
|
6745
|
+
struct ggml_context * ctx,
|
6746
|
+
struct ggml_tensor * a,
|
6747
|
+
int n_past,
|
6748
|
+
int n_dims,
|
6749
|
+
int mode,
|
6750
|
+
int n_ctx,
|
6751
|
+
float freq_base,
|
6752
|
+
float freq_scale) {
|
6753
|
+
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, true);
|
7005
6754
|
}
|
7006
6755
|
|
7007
6756
|
// ggml_rope_back
|
@@ -7011,7 +6760,8 @@ struct ggml_tensor * ggml_rope_back(
|
|
7011
6760
|
struct ggml_tensor * a,
|
7012
6761
|
int n_past,
|
7013
6762
|
int n_dims,
|
7014
|
-
int mode
|
6763
|
+
int mode,
|
6764
|
+
int n_ctx) {
|
7015
6765
|
GGML_ASSERT(n_past >= 0);
|
7016
6766
|
GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
|
7017
6767
|
|
@@ -7023,21 +6773,12 @@ struct ggml_tensor * ggml_rope_back(
|
|
7023
6773
|
|
7024
6774
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7025
6775
|
|
7026
|
-
|
7027
|
-
|
7028
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7029
|
-
ggml_set_name(b, "n_past, n_dims, mode");
|
7030
|
-
|
7031
|
-
((int32_t *) b->data)[0] = n_past;
|
7032
|
-
((int32_t *) b->data)[1] = n_dims;
|
7033
|
-
((int32_t *) b->data)[2] = mode;
|
7034
|
-
|
7035
|
-
ggml_scratch_load(ctx);
|
6776
|
+
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
6777
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7036
6778
|
|
7037
6779
|
result->op = GGML_OP_ROPE_BACK;
|
7038
6780
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7039
6781
|
result->src[0] = a;
|
7040
|
-
result->src[1] = b;
|
7041
6782
|
|
7042
6783
|
return result;
|
7043
6784
|
}
|
@@ -7062,21 +6803,13 @@ struct ggml_tensor * ggml_alibi(
|
|
7062
6803
|
//struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7063
6804
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7064
6805
|
|
7065
|
-
|
7066
|
-
|
7067
|
-
|
7068
|
-
|
7069
|
-
((int32_t *) b->data)[0] = n_past;
|
7070
|
-
((int32_t *) b->data)[1] = n_head;
|
7071
|
-
GGML_ASSERT(sizeof(float) == sizeof(int32_t));
|
7072
|
-
(((float *) b->data)[2]) = bias_max;
|
7073
|
-
|
7074
|
-
ggml_scratch_load(ctx);
|
6806
|
+
int32_t op_params[3] = { n_past, n_head };
|
6807
|
+
memcpy(op_params + 2, &bias_max, sizeof(float));
|
6808
|
+
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
7075
6809
|
|
7076
6810
|
result->op = GGML_OP_ALIBI;
|
7077
6811
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7078
6812
|
result->src[0] = a;
|
7079
|
-
result->src[1] = b;
|
7080
6813
|
|
7081
6814
|
return result;
|
7082
6815
|
}
|
@@ -7098,19 +6831,12 @@ struct ggml_tensor * ggml_clamp(
|
|
7098
6831
|
// TODO: when implement backward, fix this:
|
7099
6832
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
7100
6833
|
|
7101
|
-
|
7102
|
-
|
7103
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);
|
7104
|
-
|
7105
|
-
((float *) b->data)[0] = min;
|
7106
|
-
((float *) b->data)[1] = max;
|
7107
|
-
|
7108
|
-
ggml_scratch_load(ctx);
|
6834
|
+
float params[] = { min, max };
|
6835
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7109
6836
|
|
7110
6837
|
result->op = GGML_OP_CLAMP;
|
7111
6838
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7112
6839
|
result->src[0] = a;
|
7113
|
-
result->src[1] = b;
|
7114
6840
|
|
7115
6841
|
return result;
|
7116
6842
|
}
|
@@ -7143,18 +6869,13 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
7143
6869
|
};
|
7144
6870
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7145
6871
|
|
7146
|
-
|
7147
|
-
|
7148
|
-
((int32_t*)c->data)[0] = s0;
|
7149
|
-
((int32_t*)c->data)[1] = p0;
|
7150
|
-
((int32_t*)c->data)[2] = d0;
|
7151
|
-
ggml_scratch_load(ctx);
|
6872
|
+
int32_t params[] = { s0, p0, d0 };
|
6873
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7152
6874
|
|
7153
6875
|
result->op = GGML_OP_CONV_1D;
|
7154
6876
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7155
6877
|
result->src[0] = a;
|
7156
6878
|
result->src[1] = b;
|
7157
|
-
result->src[2] = c;
|
7158
6879
|
|
7159
6880
|
return result;
|
7160
6881
|
}
|
@@ -7187,21 +6908,13 @@ struct ggml_tensor* ggml_conv_2d(
|
|
7187
6908
|
};
|
7188
6909
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7189
6910
|
|
7190
|
-
|
7191
|
-
|
7192
|
-
((int32_t*)c->data)[0] = s0;
|
7193
|
-
((int32_t*)c->data)[1] = s1;
|
7194
|
-
((int32_t*)c->data)[2] = p0;
|
7195
|
-
((int32_t*)c->data)[3] = p1;
|
7196
|
-
((int32_t*)c->data)[4] = d0;
|
7197
|
-
((int32_t*)c->data)[5] = d1;
|
7198
|
-
ggml_scratch_load(ctx);
|
6911
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
6912
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7199
6913
|
|
7200
6914
|
result->op = GGML_OP_CONV_2D;
|
7201
6915
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7202
6916
|
result->src[0] = a;
|
7203
6917
|
result->src[1] = b;
|
7204
|
-
result->src[2] = c;
|
7205
6918
|
|
7206
6919
|
return result;
|
7207
6920
|
|
@@ -7225,7 +6938,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|
7225
6938
|
return (ins + 2 * p - ks) / s + 1;
|
7226
6939
|
}
|
7227
6940
|
|
7228
|
-
//
|
6941
|
+
// ggml_pool_1d
|
7229
6942
|
|
7230
6943
|
struct ggml_tensor* ggml_pool_1d(
|
7231
6944
|
struct ggml_context * ctx,
|
@@ -7248,18 +6961,12 @@ struct ggml_tensor* ggml_pool_1d(
|
|
7248
6961
|
};
|
7249
6962
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
7250
6963
|
|
7251
|
-
|
7252
|
-
|
7253
|
-
((int32_t*)c->data)[0] = op;
|
7254
|
-
((int32_t*)c->data)[1] = k0;
|
7255
|
-
((int32_t*)c->data)[2] = s0;
|
7256
|
-
((int32_t*)c->data)[3] = p0;
|
7257
|
-
ggml_scratch_load(ctx);
|
6964
|
+
int32_t params[] = { op, k0, s0, p0 };
|
6965
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7258
6966
|
|
7259
6967
|
result->op = GGML_OP_POOL_1D;
|
7260
6968
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7261
6969
|
result->src[0] = a;
|
7262
|
-
result->src[1] = c;
|
7263
6970
|
|
7264
6971
|
return result;
|
7265
6972
|
}
|
@@ -7291,21 +6998,12 @@ struct ggml_tensor* ggml_pool_2d(
|
|
7291
6998
|
};
|
7292
6999
|
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7293
7000
|
|
7294
|
-
|
7295
|
-
|
7296
|
-
((int32_t*)c->data)[0] = op;
|
7297
|
-
((int32_t*)c->data)[1] = k0;
|
7298
|
-
((int32_t*)c->data)[2] = k1;
|
7299
|
-
((int32_t*)c->data)[3] = s0;
|
7300
|
-
((int32_t*)c->data)[4] = s1;
|
7301
|
-
((int32_t*)c->data)[5] = p0;
|
7302
|
-
((int32_t*)c->data)[6] = p1;
|
7303
|
-
ggml_scratch_load(ctx);
|
7001
|
+
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
7002
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7304
7003
|
|
7305
7004
|
result->op = GGML_OP_POOL_2D;
|
7306
7005
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7307
7006
|
result->src[0] = a;
|
7308
|
-
result->src[1] = c;
|
7309
7007
|
|
7310
7008
|
return result;
|
7311
7009
|
}
|
@@ -7328,14 +7026,16 @@ struct ggml_tensor * ggml_flash_attn(
|
|
7328
7026
|
}
|
7329
7027
|
|
7330
7028
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, q);
|
7331
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7029
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, q->n_dims, q->ne);
|
7030
|
+
|
7031
|
+
int32_t t = masked ? 1 : 0;
|
7032
|
+
ggml_set_op_params(result, &t, sizeof(t));
|
7332
7033
|
|
7333
7034
|
result->op = GGML_OP_FLASH_ATTN;
|
7334
7035
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7335
7036
|
result->src[0] = q;
|
7336
7037
|
result->src[1] = k;
|
7337
7038
|
result->src[2] = v;
|
7338
|
-
result->src[3] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7339
7039
|
|
7340
7040
|
return result;
|
7341
7041
|
}
|
@@ -7359,7 +7059,7 @@ struct ggml_tensor * ggml_flash_ff(
|
|
7359
7059
|
}
|
7360
7060
|
|
7361
7061
|
//struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
7362
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32,
|
7062
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, a->ne);
|
7363
7063
|
|
7364
7064
|
result->op = GGML_OP_FLASH_FF;
|
7365
7065
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
@@ -7425,13 +7125,15 @@ struct ggml_tensor * ggml_flash_attn_back(
|
|
7425
7125
|
|
7426
7126
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7427
7127
|
|
7128
|
+
int32_t masked_i = masked ? 1 : 0;
|
7129
|
+
ggml_set_op_params(result, &masked_i, sizeof(masked_i));
|
7130
|
+
|
7428
7131
|
result->op = GGML_OP_FLASH_ATTN_BACK;
|
7429
7132
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7430
7133
|
result->src[0] = q;
|
7431
7134
|
result->src[1] = k;
|
7432
7135
|
result->src[2] = v;
|
7433
7136
|
result->src[3] = d;
|
7434
|
-
result->src[4] = ggml_new_i32(ctx, masked ? 1 : 0);
|
7435
7137
|
|
7436
7138
|
return result;
|
7437
7139
|
}
|
@@ -7464,21 +7166,12 @@ struct ggml_tensor * ggml_win_part(
|
|
7464
7166
|
|
7465
7167
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
7466
7168
|
|
7467
|
-
|
7468
|
-
|
7469
|
-
struct ggml_tensor * b = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
|
7470
|
-
|
7471
|
-
((int32_t *) b->data)[0] = npx;
|
7472
|
-
((int32_t *) b->data)[1] = npy;
|
7473
|
-
((int32_t *) b->data)[2] = w;
|
7474
|
-
|
7475
|
-
ggml_scratch_load(ctx);
|
7169
|
+
int32_t params[] = { npx, npy, w };
|
7170
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7476
7171
|
|
7477
7172
|
result->op = GGML_OP_WIN_PART;
|
7478
7173
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7479
7174
|
result->src[0] = a;
|
7480
|
-
result->src[1] = NULL;
|
7481
|
-
result->src[2] = b;
|
7482
7175
|
|
7483
7176
|
return result;
|
7484
7177
|
}
|
@@ -7503,26 +7196,57 @@ struct ggml_tensor * ggml_win_unpart(
|
|
7503
7196
|
const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
|
7504
7197
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
7505
7198
|
|
7506
|
-
|
7199
|
+
int32_t params[] = { w };
|
7200
|
+
ggml_set_op_params(result, ¶ms, sizeof(params));
|
7507
7201
|
|
7508
|
-
|
7202
|
+
result->op = GGML_OP_WIN_UNPART;
|
7203
|
+
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7204
|
+
result->src[0] = a;
|
7509
7205
|
|
7510
|
-
|
7206
|
+
return result;
|
7207
|
+
}
|
7511
7208
|
|
7512
|
-
|
7209
|
+
// gmml_unary
|
7513
7210
|
|
7514
|
-
|
7211
|
+
static struct ggml_tensor * ggml_unary_impl(
|
7212
|
+
struct ggml_context * ctx,
|
7213
|
+
struct ggml_tensor * a,
|
7214
|
+
enum ggml_unary_op op,
|
7215
|
+
bool inplace) {
|
7216
|
+
bool is_node = false;
|
7217
|
+
|
7218
|
+
if (!inplace && (a->grad)) {
|
7219
|
+
is_node = true;
|
7220
|
+
}
|
7221
|
+
|
7222
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7223
|
+
|
7224
|
+
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
7225
|
+
|
7226
|
+
result->op = GGML_OP_UNARY;
|
7515
7227
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7516
7228
|
result->src[0] = a;
|
7517
|
-
result->src[1] = NULL;
|
7518
|
-
result->src[2] = b;
|
7519
7229
|
|
7520
7230
|
return result;
|
7521
7231
|
}
|
7522
7232
|
|
7233
|
+
struct ggml_tensor * ggml_unary(
|
7234
|
+
struct ggml_context * ctx,
|
7235
|
+
struct ggml_tensor * a,
|
7236
|
+
enum ggml_unary_op op) {
|
7237
|
+
return ggml_unary_impl(ctx, a, op, false);
|
7238
|
+
}
|
7239
|
+
|
7240
|
+
struct ggml_tensor * ggml_unary_inplace(
|
7241
|
+
struct ggml_context * ctx,
|
7242
|
+
struct ggml_tensor * a,
|
7243
|
+
enum ggml_unary_op op) {
|
7244
|
+
return ggml_unary_impl(ctx, a, op, true);
|
7245
|
+
}
|
7246
|
+
|
7523
7247
|
// ggml_map_unary
|
7524
7248
|
|
7525
|
-
struct ggml_tensor * ggml_map_unary_impl_f32(
|
7249
|
+
static struct ggml_tensor * ggml_map_unary_impl_f32(
|
7526
7250
|
struct ggml_context * ctx,
|
7527
7251
|
struct ggml_tensor * a,
|
7528
7252
|
const ggml_unary_op_f32_t fun,
|
@@ -7533,19 +7257,13 @@ struct ggml_tensor * ggml_map_unary_impl_f32(
|
|
7533
7257
|
is_node = true;
|
7534
7258
|
}
|
7535
7259
|
|
7536
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7537
|
-
|
7538
|
-
ggml_scratch_save(ctx);
|
7539
|
-
|
7540
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7541
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7260
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7542
7261
|
|
7543
|
-
|
7262
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7544
7263
|
|
7545
7264
|
result->op = GGML_OP_MAP_UNARY;
|
7546
7265
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7547
7266
|
result->src[0] = a;
|
7548
|
-
result->src[2] = addr_tensor;
|
7549
7267
|
|
7550
7268
|
return result;
|
7551
7269
|
}
|
@@ -7566,7 +7284,7 @@ struct ggml_tensor * ggml_map_unary_inplace_f32(
|
|
7566
7284
|
|
7567
7285
|
// ggml_map_binary
|
7568
7286
|
|
7569
|
-
struct ggml_tensor * ggml_map_binary_impl_f32(
|
7287
|
+
static struct ggml_tensor * ggml_map_binary_impl_f32(
|
7570
7288
|
struct ggml_context * ctx,
|
7571
7289
|
struct ggml_tensor * a,
|
7572
7290
|
struct ggml_tensor * b,
|
@@ -7580,20 +7298,14 @@ struct ggml_tensor * ggml_map_binary_impl_f32(
|
|
7580
7298
|
is_node = true;
|
7581
7299
|
}
|
7582
7300
|
|
7583
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7584
|
-
|
7585
|
-
ggml_scratch_save(ctx);
|
7586
|
-
|
7587
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7588
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7301
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7589
7302
|
|
7590
|
-
|
7303
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7591
7304
|
|
7592
7305
|
result->op = GGML_OP_MAP_BINARY;
|
7593
7306
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7594
7307
|
result->src[0] = a;
|
7595
7308
|
result->src[1] = b;
|
7596
|
-
result->src[2] = addr_tensor;
|
7597
7309
|
|
7598
7310
|
return result;
|
7599
7311
|
}
|
@@ -7616,7 +7328,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|
7616
7328
|
|
7617
7329
|
// ggml_map_custom1
|
7618
7330
|
|
7619
|
-
struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7331
|
+
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
7620
7332
|
struct ggml_context * ctx,
|
7621
7333
|
struct ggml_tensor * a,
|
7622
7334
|
const ggml_custom1_op_f32_t fun,
|
@@ -7627,19 +7339,13 @@ struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|
7627
7339
|
is_node = true;
|
7628
7340
|
}
|
7629
7341
|
|
7630
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7631
|
-
|
7632
|
-
ggml_scratch_save(ctx);
|
7633
|
-
|
7634
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7635
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7342
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7636
7343
|
|
7637
|
-
|
7344
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7638
7345
|
|
7639
7346
|
result->op = GGML_OP_MAP_CUSTOM1;
|
7640
7347
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7641
7348
|
result->src[0] = a;
|
7642
|
-
result->src[2] = addr_tensor;
|
7643
7349
|
|
7644
7350
|
return result;
|
7645
7351
|
}
|
@@ -7660,7 +7366,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|
7660
7366
|
|
7661
7367
|
// ggml_map_custom2
|
7662
7368
|
|
7663
|
-
struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7369
|
+
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
7664
7370
|
struct ggml_context * ctx,
|
7665
7371
|
struct ggml_tensor * a,
|
7666
7372
|
struct ggml_tensor * b,
|
@@ -7672,20 +7378,14 @@ struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|
7672
7378
|
is_node = true;
|
7673
7379
|
}
|
7674
7380
|
|
7675
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7676
|
-
|
7677
|
-
ggml_scratch_save(ctx);
|
7678
|
-
|
7679
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7680
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7381
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7681
7382
|
|
7682
|
-
|
7383
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7683
7384
|
|
7684
7385
|
result->op = GGML_OP_MAP_CUSTOM2;
|
7685
7386
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7686
7387
|
result->src[0] = a;
|
7687
7388
|
result->src[1] = b;
|
7688
|
-
result->src[2] = addr_tensor;
|
7689
7389
|
|
7690
7390
|
return result;
|
7691
7391
|
}
|
@@ -7708,7 +7408,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|
7708
7408
|
|
7709
7409
|
// ggml_map_custom3
|
7710
7410
|
|
7711
|
-
struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7411
|
+
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
7712
7412
|
struct ggml_context * ctx,
|
7713
7413
|
struct ggml_tensor * a,
|
7714
7414
|
struct ggml_tensor * b,
|
@@ -7721,21 +7421,15 @@ struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|
7721
7421
|
is_node = true;
|
7722
7422
|
}
|
7723
7423
|
|
7724
|
-
struct ggml_tensor *result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7725
|
-
|
7726
|
-
ggml_scratch_save(ctx);
|
7727
|
-
|
7728
|
-
struct ggml_tensor * addr_tensor = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(void *) / sizeof(int32_t));
|
7729
|
-
*((void (**)(void))addr_tensor->data) = (void (*)(void))fun;
|
7424
|
+
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
7730
7425
|
|
7731
|
-
|
7426
|
+
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
7732
7427
|
|
7733
7428
|
result->op = GGML_OP_MAP_CUSTOM3;
|
7734
7429
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
7735
7430
|
result->src[0] = a;
|
7736
7431
|
result->src[1] = b;
|
7737
|
-
result->src[2] =
|
7738
|
-
result->src[3] = c;
|
7432
|
+
result->src[2] = c;
|
7739
7433
|
|
7740
7434
|
return result;
|
7741
7435
|
}
|
@@ -8963,21 +8657,17 @@ static void ggml_compute_forward_acc_f32(
|
|
8963
8657
|
const struct ggml_compute_params * params,
|
8964
8658
|
const struct ggml_tensor * src0,
|
8965
8659
|
const struct ggml_tensor * src1,
|
8966
|
-
const struct ggml_tensor * opt0,
|
8967
8660
|
struct ggml_tensor * dst) {
|
8968
8661
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
8969
8662
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
8970
8663
|
|
8971
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
8972
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
8973
|
-
|
8974
8664
|
// view src0 and dst with these strides and data offset inbytes during acc
|
8975
8665
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
8976
|
-
size_t nb1 = ((int32_t *)
|
8977
|
-
size_t nb2 = ((int32_t *)
|
8978
|
-
size_t nb3 = ((int32_t *)
|
8979
|
-
size_t offset = ((int32_t *)
|
8980
|
-
bool inplace = (bool) ((int32_t *)
|
8666
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
8667
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
8668
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
8669
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
8670
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
8981
8671
|
|
8982
8672
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
8983
8673
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -9046,13 +8736,12 @@ static void ggml_compute_forward_acc(
|
|
9046
8736
|
const struct ggml_compute_params * params,
|
9047
8737
|
const struct ggml_tensor * src0,
|
9048
8738
|
const struct ggml_tensor * src1,
|
9049
|
-
const struct ggml_tensor * opt0,
|
9050
8739
|
struct ggml_tensor * dst) {
|
9051
8740
|
|
9052
8741
|
switch (src0->type) {
|
9053
8742
|
case GGML_TYPE_F32:
|
9054
8743
|
{
|
9055
|
-
ggml_compute_forward_acc_f32(params, src0, src1,
|
8744
|
+
ggml_compute_forward_acc_f32(params, src0, src1, dst);
|
9056
8745
|
} break;
|
9057
8746
|
case GGML_TYPE_F16:
|
9058
8747
|
case GGML_TYPE_Q4_0:
|
@@ -9484,7 +9173,7 @@ static void ggml_compute_forward_sum_f32(
|
|
9484
9173
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9485
9174
|
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9486
9175
|
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9487
|
-
|
9176
|
+
ggml_vec_sum_f32_ggf(ne00,
|
9488
9177
|
&row_sum,
|
9489
9178
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
9490
9179
|
sum += row_sum;
|
@@ -9494,6 +9183,38 @@ static void ggml_compute_forward_sum_f32(
|
|
9494
9183
|
((float *) dst->data)[0] = sum;
|
9495
9184
|
}
|
9496
9185
|
|
9186
|
+
static void ggml_compute_forward_sum_f16(
|
9187
|
+
const struct ggml_compute_params * params,
|
9188
|
+
const struct ggml_tensor * src0,
|
9189
|
+
struct ggml_tensor * dst) {
|
9190
|
+
assert(params->ith == 0);
|
9191
|
+
assert(ggml_is_scalar(dst));
|
9192
|
+
|
9193
|
+
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
9194
|
+
return;
|
9195
|
+
}
|
9196
|
+
|
9197
|
+
assert(src0->nb[0] == sizeof(ggml_fp16_t));
|
9198
|
+
|
9199
|
+
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
9200
|
+
GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
|
9201
|
+
|
9202
|
+
float sum = 0;
|
9203
|
+
float row_sum = 0;
|
9204
|
+
|
9205
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
9206
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
9207
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
9208
|
+
ggml_vec_sum_f16_ggf(ne00,
|
9209
|
+
&row_sum,
|
9210
|
+
(ggml_fp16_t *) ((char *) src0->data + i01 * nb01 + i02 * nb02 + i03 * nb03));
|
9211
|
+
sum += row_sum;
|
9212
|
+
}
|
9213
|
+
}
|
9214
|
+
}
|
9215
|
+
((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum);
|
9216
|
+
}
|
9217
|
+
|
9497
9218
|
static void ggml_compute_forward_sum(
|
9498
9219
|
const struct ggml_compute_params * params,
|
9499
9220
|
const struct ggml_tensor * src0,
|
@@ -9503,6 +9224,10 @@ static void ggml_compute_forward_sum(
|
|
9503
9224
|
{
|
9504
9225
|
ggml_compute_forward_sum_f32(params, src0, dst);
|
9505
9226
|
} break;
|
9227
|
+
case GGML_TYPE_F16:
|
9228
|
+
{
|
9229
|
+
ggml_compute_forward_sum_f16(params, src0, dst);
|
9230
|
+
} break;
|
9506
9231
|
default:
|
9507
9232
|
{
|
9508
9233
|
GGML_ASSERT(false);
|
@@ -10098,8 +9823,8 @@ static void ggml_compute_forward_gelu_f32(
|
|
10098
9823
|
const struct ggml_compute_params * params,
|
10099
9824
|
const struct ggml_tensor * src0,
|
10100
9825
|
struct ggml_tensor * dst) {
|
10101
|
-
GGML_ASSERT(
|
10102
|
-
GGML_ASSERT(
|
9826
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9827
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10103
9828
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10104
9829
|
|
10105
9830
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10157,8 +9882,8 @@ static void ggml_compute_forward_gelu_quick_f32(
|
|
10157
9882
|
const struct ggml_compute_params * params,
|
10158
9883
|
const struct ggml_tensor * src0,
|
10159
9884
|
struct ggml_tensor * dst) {
|
10160
|
-
GGML_ASSERT(
|
10161
|
-
GGML_ASSERT(
|
9885
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9886
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10162
9887
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10163
9888
|
|
10164
9889
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10216,8 +9941,8 @@ static void ggml_compute_forward_silu_f32(
|
|
10216
9941
|
const struct ggml_compute_params * params,
|
10217
9942
|
const struct ggml_tensor * src0,
|
10218
9943
|
struct ggml_tensor * dst) {
|
10219
|
-
GGML_ASSERT(
|
10220
|
-
GGML_ASSERT(
|
9944
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
9945
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10221
9946
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10222
9947
|
|
10223
9948
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
@@ -10269,7 +9994,6 @@ static void ggml_compute_forward_silu(
|
|
10269
9994
|
}
|
10270
9995
|
}
|
10271
9996
|
|
10272
|
-
|
10273
9997
|
// ggml_compute_forward_silu_back
|
10274
9998
|
|
10275
9999
|
static void ggml_compute_forward_silu_back_f32(
|
@@ -10277,9 +10001,9 @@ static void ggml_compute_forward_silu_back_f32(
|
|
10277
10001
|
const struct ggml_tensor * src0,
|
10278
10002
|
const struct ggml_tensor * grad,
|
10279
10003
|
struct ggml_tensor * dst) {
|
10280
|
-
GGML_ASSERT(
|
10281
|
-
GGML_ASSERT(
|
10282
|
-
GGML_ASSERT(
|
10004
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(grad));
|
10005
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(src0));
|
10006
|
+
GGML_ASSERT(ggml_is_contiguous_except_dim_1(dst));
|
10283
10007
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
10284
10008
|
GGML_ASSERT(ggml_are_same_shape(src0, grad));
|
10285
10009
|
|
@@ -10419,7 +10143,8 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
10419
10143
|
|
10420
10144
|
GGML_TENSOR_UNARY_OP_LOCALS;
|
10421
10145
|
|
10422
|
-
|
10146
|
+
float eps;
|
10147
|
+
memcpy(&eps, dst->op_params, sizeof(float));
|
10423
10148
|
|
10424
10149
|
// TODO: optimize
|
10425
10150
|
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
@@ -10684,6 +10409,8 @@ static void ggml_compute_forward_mul_mat(
|
|
10684
10409
|
|
10685
10410
|
const enum ggml_type type = src0->type;
|
10686
10411
|
|
10412
|
+
const bool src1_cont = ggml_is_contiguous(src1);
|
10413
|
+
|
10687
10414
|
ggml_vec_dot_t const vec_dot = type_traits[type].vec_dot;
|
10688
10415
|
enum ggml_type const vec_dot_type = type_traits[type].vec_dot_type;
|
10689
10416
|
ggml_from_float_t const from_float_to_vec_dot = type_traits[vec_dot_type].from_float;
|
@@ -10747,7 +10474,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10747
10474
|
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
10748
10475
|
|
10749
10476
|
if (type != GGML_TYPE_F32) {
|
10750
|
-
|
10477
|
+
float * const wdata = params->wdata;
|
10751
10478
|
ggml_to_float_t const to_float = type_traits[type].to_float;
|
10752
10479
|
|
10753
10480
|
size_t id = 0;
|
@@ -10805,7 +10532,7 @@ static void ggml_compute_forward_mul_mat(
|
|
10805
10532
|
// src1 rows
|
10806
10533
|
const int64_t nr1 = ne11*ne12*ne13;
|
10807
10534
|
|
10808
|
-
void * wdata
|
10535
|
+
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
10809
10536
|
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
|
10810
10537
|
|
10811
10538
|
for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
|
@@ -10828,7 +10555,15 @@ static void ggml_compute_forward_mul_mat(
|
|
10828
10555
|
const int64_t i3 = i13;
|
10829
10556
|
|
10830
10557
|
const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
|
10831
|
-
|
10558
|
+
|
10559
|
+
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
10560
|
+
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
10561
|
+
// the original src1 data pointer, so we should index using the indices directly
|
10562
|
+
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
10563
|
+
const char * src1_col = (const char *) wdata +
|
10564
|
+
(src1_cont || src1->type != vec_dot_type
|
10565
|
+
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
10566
|
+
: (i11*nb11 + i12*nb12 + i13*nb13));
|
10832
10567
|
|
10833
10568
|
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
10834
10569
|
|
@@ -11062,21 +10797,17 @@ static void ggml_compute_forward_set_f32(
|
|
11062
10797
|
const struct ggml_compute_params * params,
|
11063
10798
|
const struct ggml_tensor * src0,
|
11064
10799
|
const struct ggml_tensor * src1,
|
11065
|
-
const struct ggml_tensor * opt0,
|
11066
10800
|
struct ggml_tensor * dst) {
|
11067
10801
|
GGML_ASSERT(ggml_are_same_shape(src0, dst));
|
11068
10802
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
11069
10803
|
|
11070
|
-
GGML_ASSERT(opt0->type == GGML_TYPE_I32);
|
11071
|
-
GGML_ASSERT(ggml_nelements(opt0) == 5);
|
11072
|
-
|
11073
10804
|
// view src0 and dst with these strides and data offset inbytes during set
|
11074
10805
|
// nb0 is implicitely element_size because src0 and dst are contiguous
|
11075
|
-
size_t nb1 = ((int32_t *)
|
11076
|
-
size_t nb2 = ((int32_t *)
|
11077
|
-
size_t nb3 = ((int32_t *)
|
11078
|
-
size_t offset = ((int32_t *)
|
11079
|
-
bool inplace = (bool) ((int32_t *)
|
10806
|
+
size_t nb1 = ((int32_t *) dst->op_params)[0];
|
10807
|
+
size_t nb2 = ((int32_t *) dst->op_params)[1];
|
10808
|
+
size_t nb3 = ((int32_t *) dst->op_params)[2];
|
10809
|
+
size_t offset = ((int32_t *) dst->op_params)[3];
|
10810
|
+
bool inplace = (bool) ((int32_t *) dst->op_params)[4];
|
11080
10811
|
|
11081
10812
|
if (!inplace && (params->type == GGML_TASK_INIT)) {
|
11082
10813
|
// memcpy needs to be synchronized across threads to avoid race conditions.
|
@@ -11136,13 +10867,12 @@ static void ggml_compute_forward_set(
|
|
11136
10867
|
const struct ggml_compute_params * params,
|
11137
10868
|
const struct ggml_tensor * src0,
|
11138
10869
|
const struct ggml_tensor * src1,
|
11139
|
-
const struct ggml_tensor * opt0,
|
11140
10870
|
struct ggml_tensor * dst) {
|
11141
10871
|
|
11142
10872
|
switch (src0->type) {
|
11143
10873
|
case GGML_TYPE_F32:
|
11144
10874
|
{
|
11145
|
-
ggml_compute_forward_set_f32(params, src0, src1,
|
10875
|
+
ggml_compute_forward_set_f32(params, src0, src1, dst);
|
11146
10876
|
} break;
|
11147
10877
|
case GGML_TYPE_F16:
|
11148
10878
|
case GGML_TYPE_Q4_0:
|
@@ -11538,17 +11268,14 @@ static void ggml_compute_forward_diag(
|
|
11538
11268
|
static void ggml_compute_forward_diag_mask_f32(
|
11539
11269
|
const struct ggml_compute_params * params,
|
11540
11270
|
const struct ggml_tensor * src0,
|
11541
|
-
const struct ggml_tensor * src1,
|
11542
11271
|
struct ggml_tensor * dst,
|
11543
11272
|
const float value) {
|
11544
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11545
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11546
11273
|
|
11547
11274
|
const int ith = params->ith;
|
11548
11275
|
const int nth = params->nth;
|
11549
11276
|
|
11550
|
-
const int n_past = ((int32_t *)
|
11551
|
-
const bool inplace = (bool)((int32_t *)
|
11277
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11278
|
+
const bool inplace = (bool)((int32_t *) dst->op_params)[1];
|
11552
11279
|
|
11553
11280
|
GGML_ASSERT(n_past >= 0);
|
11554
11281
|
|
@@ -11591,12 +11318,11 @@ static void ggml_compute_forward_diag_mask_f32(
|
|
11591
11318
|
static void ggml_compute_forward_diag_mask_inf(
|
11592
11319
|
const struct ggml_compute_params * params,
|
11593
11320
|
const struct ggml_tensor * src0,
|
11594
|
-
const struct ggml_tensor * src1,
|
11595
11321
|
struct ggml_tensor * dst) {
|
11596
11322
|
switch (src0->type) {
|
11597
11323
|
case GGML_TYPE_F32:
|
11598
11324
|
{
|
11599
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11325
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, -INFINITY);
|
11600
11326
|
} break;
|
11601
11327
|
default:
|
11602
11328
|
{
|
@@ -11608,12 +11334,11 @@ static void ggml_compute_forward_diag_mask_inf(
|
|
11608
11334
|
static void ggml_compute_forward_diag_mask_zero(
|
11609
11335
|
const struct ggml_compute_params * params,
|
11610
11336
|
const struct ggml_tensor * src0,
|
11611
|
-
const struct ggml_tensor * src1,
|
11612
11337
|
struct ggml_tensor * dst) {
|
11613
11338
|
switch (src0->type) {
|
11614
11339
|
case GGML_TYPE_F32:
|
11615
11340
|
{
|
11616
|
-
ggml_compute_forward_diag_mask_f32(params, src0,
|
11341
|
+
ggml_compute_forward_diag_mask_f32(params, src0, dst, 0);
|
11617
11342
|
} break;
|
11618
11343
|
default:
|
11619
11344
|
{
|
@@ -11811,20 +11536,17 @@ static void ggml_compute_forward_soft_max_back(
|
|
11811
11536
|
static void ggml_compute_forward_alibi_f32(
|
11812
11537
|
const struct ggml_compute_params * params,
|
11813
11538
|
const struct ggml_tensor * src0,
|
11814
|
-
const struct ggml_tensor * src1,
|
11815
11539
|
struct ggml_tensor * dst) {
|
11816
11540
|
assert(params->ith == 0);
|
11817
11541
|
|
11818
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11819
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11820
|
-
|
11821
11542
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11822
11543
|
return;
|
11823
11544
|
}
|
11824
11545
|
|
11825
|
-
const int
|
11826
|
-
const int
|
11827
|
-
|
11546
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11547
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11548
|
+
float max_bias;
|
11549
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11828
11550
|
|
11829
11551
|
assert(n_past >= 0);
|
11830
11552
|
|
@@ -11877,20 +11599,17 @@ static void ggml_compute_forward_alibi_f32(
|
|
11877
11599
|
static void ggml_compute_forward_alibi_f16(
|
11878
11600
|
const struct ggml_compute_params * params,
|
11879
11601
|
const struct ggml_tensor * src0,
|
11880
|
-
const struct ggml_tensor * src1,
|
11881
11602
|
struct ggml_tensor * dst) {
|
11882
11603
|
assert(params->ith == 0);
|
11883
11604
|
|
11884
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
11885
|
-
GGML_ASSERT(ggml_nelements(src1) == 3);
|
11886
|
-
|
11887
11605
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11888
11606
|
return;
|
11889
11607
|
}
|
11890
11608
|
|
11891
|
-
const int
|
11892
|
-
const int
|
11893
|
-
|
11609
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11610
|
+
const int n_head = ((int32_t *) dst->op_params)[1];
|
11611
|
+
float max_bias;
|
11612
|
+
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
11894
11613
|
|
11895
11614
|
assert(n_past >= 0);
|
11896
11615
|
|
@@ -11943,16 +11662,15 @@ static void ggml_compute_forward_alibi_f16(
|
|
11943
11662
|
static void ggml_compute_forward_alibi(
|
11944
11663
|
const struct ggml_compute_params * params,
|
11945
11664
|
const struct ggml_tensor * src0,
|
11946
|
-
const struct ggml_tensor * src1,
|
11947
11665
|
struct ggml_tensor * dst) {
|
11948
11666
|
switch (src0->type) {
|
11949
11667
|
case GGML_TYPE_F16:
|
11950
11668
|
{
|
11951
|
-
ggml_compute_forward_alibi_f16(params, src0,
|
11669
|
+
ggml_compute_forward_alibi_f16(params, src0, dst);
|
11952
11670
|
} break;
|
11953
11671
|
case GGML_TYPE_F32:
|
11954
11672
|
{
|
11955
|
-
ggml_compute_forward_alibi_f32(params, src0,
|
11673
|
+
ggml_compute_forward_alibi_f32(params, src0, dst);
|
11956
11674
|
} break;
|
11957
11675
|
case GGML_TYPE_Q4_0:
|
11958
11676
|
case GGML_TYPE_Q4_1:
|
@@ -11982,19 +11700,17 @@ static void ggml_compute_forward_alibi(
|
|
11982
11700
|
static void ggml_compute_forward_clamp_f32(
|
11983
11701
|
const struct ggml_compute_params * params,
|
11984
11702
|
const struct ggml_tensor * src0,
|
11985
|
-
const struct ggml_tensor * src1,
|
11986
11703
|
struct ggml_tensor * dst) {
|
11987
11704
|
assert(params->ith == 0);
|
11988
11705
|
|
11989
|
-
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
11990
|
-
GGML_ASSERT(ggml_nelements(src1) == 2);
|
11991
|
-
|
11992
11706
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11993
11707
|
return;
|
11994
11708
|
}
|
11995
11709
|
|
11996
|
-
|
11997
|
-
|
11710
|
+
float min;
|
11711
|
+
float max;
|
11712
|
+
memcpy(&min, (float *) dst->op_params + 0, sizeof(float));
|
11713
|
+
memcpy(&max, (float *) dst->op_params + 1, sizeof(float));
|
11998
11714
|
|
11999
11715
|
const int ith = params->ith;
|
12000
11716
|
const int nth = params->nth;
|
@@ -12024,12 +11740,11 @@ static void ggml_compute_forward_clamp_f32(
|
|
12024
11740
|
static void ggml_compute_forward_clamp(
|
12025
11741
|
const struct ggml_compute_params * params,
|
12026
11742
|
const struct ggml_tensor * src0,
|
12027
|
-
const struct ggml_tensor * src1,
|
12028
11743
|
struct ggml_tensor * dst) {
|
12029
11744
|
switch (src0->type) {
|
12030
11745
|
case GGML_TYPE_F32:
|
12031
11746
|
{
|
12032
|
-
ggml_compute_forward_clamp_f32(params, src0,
|
11747
|
+
ggml_compute_forward_clamp_f32(params, src0, dst);
|
12033
11748
|
} break;
|
12034
11749
|
case GGML_TYPE_F16:
|
12035
11750
|
case GGML_TYPE_Q4_0:
|
@@ -12059,19 +11774,21 @@ static void ggml_compute_forward_clamp(
|
|
12059
11774
|
static void ggml_compute_forward_rope_f32(
|
12060
11775
|
const struct ggml_compute_params * params,
|
12061
11776
|
const struct ggml_tensor * src0,
|
12062
|
-
const struct ggml_tensor * src1,
|
12063
11777
|
struct ggml_tensor * dst) {
|
12064
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12065
|
-
GGML_ASSERT(ggml_nelements(src1) == 4);
|
12066
11778
|
|
12067
11779
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12068
11780
|
return;
|
12069
11781
|
}
|
12070
11782
|
|
12071
|
-
|
12072
|
-
|
12073
|
-
|
12074
|
-
const int
|
11783
|
+
float freq_base;
|
11784
|
+
float freq_scale;
|
11785
|
+
|
11786
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11787
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11788
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11789
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11790
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11791
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12075
11792
|
|
12076
11793
|
assert(n_past >= 0);
|
12077
11794
|
|
@@ -12100,7 +11817,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12100
11817
|
// row index used to determine which thread to use
|
12101
11818
|
int ir = 0;
|
12102
11819
|
|
12103
|
-
const float theta_scale = powf(
|
11820
|
+
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
12104
11821
|
|
12105
11822
|
const bool is_neox = mode & 2;
|
12106
11823
|
const bool is_glm = mode & 4;
|
@@ -12112,7 +11829,7 @@ static void ggml_compute_forward_rope_f32(
|
|
12112
11829
|
if (ir++ < ir0) continue;
|
12113
11830
|
if (ir > ir1) break;
|
12114
11831
|
|
12115
|
-
float theta = (float)p;
|
11832
|
+
float theta = freq_scale * (float)p;
|
12116
11833
|
|
12117
11834
|
if (is_glm) {
|
12118
11835
|
theta = MIN(p, n_ctx - 2);
|
@@ -12186,19 +11903,21 @@ static void ggml_compute_forward_rope_f32(
|
|
12186
11903
|
static void ggml_compute_forward_rope_f16(
|
12187
11904
|
const struct ggml_compute_params * params,
|
12188
11905
|
const struct ggml_tensor * src0,
|
12189
|
-
const struct ggml_tensor * src1,
|
12190
11906
|
struct ggml_tensor * dst) {
|
12191
|
-
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
12192
|
-
GGML_ASSERT(ggml_nelements(src1) == 4);
|
12193
11907
|
|
12194
11908
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12195
11909
|
return;
|
12196
11910
|
}
|
12197
11911
|
|
12198
|
-
|
12199
|
-
|
12200
|
-
|
12201
|
-
const int
|
11912
|
+
float freq_base;
|
11913
|
+
float freq_scale;
|
11914
|
+
|
11915
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
11916
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11917
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
11918
|
+
const int n_ctx = ((int32_t *) dst->op_params)[3];
|
11919
|
+
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11920
|
+
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
12202
11921
|
|
12203
11922
|
assert(n_past >= 0);
|
12204
11923
|
|
@@ -12227,7 +11946,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12227
11946
|
// row index used to determine which thread to use
|
12228
11947
|
int ir = 0;
|
12229
11948
|
|
12230
|
-
const float theta_scale = powf(
|
11949
|
+
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
12231
11950
|
|
12232
11951
|
const bool is_neox = mode & 2;
|
12233
11952
|
const bool is_glm = mode & 4;
|
@@ -12239,7 +11958,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12239
11958
|
if (ir++ < ir0) continue;
|
12240
11959
|
if (ir > ir1) break;
|
12241
11960
|
|
12242
|
-
float theta = (float)p;
|
11961
|
+
float theta = freq_scale * (float)p;
|
12243
11962
|
|
12244
11963
|
if (is_glm) {
|
12245
11964
|
theta = MIN(p, n_ctx - 2);
|
@@ -12300,7 +12019,7 @@ static void ggml_compute_forward_rope_f16(
|
|
12300
12019
|
const float x0 = GGML_FP16_TO_FP32(src[0]);
|
12301
12020
|
const float x1 = GGML_FP16_TO_FP32(src[n_dims/2]);
|
12302
12021
|
|
12303
|
-
dst_data[0]
|
12022
|
+
dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
12304
12023
|
dst_data[n_dims/2] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
12305
12024
|
}
|
12306
12025
|
}
|
@@ -12313,16 +12032,15 @@ static void ggml_compute_forward_rope_f16(
|
|
12313
12032
|
static void ggml_compute_forward_rope(
|
12314
12033
|
const struct ggml_compute_params * params,
|
12315
12034
|
const struct ggml_tensor * src0,
|
12316
|
-
const struct ggml_tensor * src1,
|
12317
12035
|
struct ggml_tensor * dst) {
|
12318
12036
|
switch (src0->type) {
|
12319
12037
|
case GGML_TYPE_F16:
|
12320
12038
|
{
|
12321
|
-
ggml_compute_forward_rope_f16(params, src0,
|
12039
|
+
ggml_compute_forward_rope_f16(params, src0, dst);
|
12322
12040
|
} break;
|
12323
12041
|
case GGML_TYPE_F32:
|
12324
12042
|
{
|
12325
|
-
ggml_compute_forward_rope_f32(params, src0,
|
12043
|
+
ggml_compute_forward_rope_f32(params, src0, dst);
|
12326
12044
|
} break;
|
12327
12045
|
default:
|
12328
12046
|
{
|
@@ -12336,10 +12054,7 @@ static void ggml_compute_forward_rope(
|
|
12336
12054
|
static void ggml_compute_forward_rope_back_f32(
|
12337
12055
|
const struct ggml_compute_params * params,
|
12338
12056
|
const struct ggml_tensor * src0,
|
12339
|
-
const struct ggml_tensor * src1,
|
12340
12057
|
struct ggml_tensor * dst) {
|
12341
|
-
assert(src1->type == GGML_TYPE_I32);
|
12342
|
-
assert(ggml_nelements(src1) == 3);
|
12343
12058
|
|
12344
12059
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12345
12060
|
return;
|
@@ -12349,9 +12064,9 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12349
12064
|
// dx = rope_back(dy, src1)
|
12350
12065
|
// src0 is dy, src1 contains options
|
12351
12066
|
|
12352
|
-
const int n_past = ((int32_t *)
|
12353
|
-
const int n_dims = ((int32_t *)
|
12354
|
-
const int mode = ((int32_t *)
|
12067
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12068
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12069
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12355
12070
|
|
12356
12071
|
assert(n_past >= 0);
|
12357
12072
|
|
@@ -12435,10 +12150,7 @@ static void ggml_compute_forward_rope_back_f32(
|
|
12435
12150
|
static void ggml_compute_forward_rope_back_f16(
|
12436
12151
|
const struct ggml_compute_params * params,
|
12437
12152
|
const struct ggml_tensor * src0,
|
12438
|
-
const struct ggml_tensor * src1,
|
12439
12153
|
struct ggml_tensor * dst) {
|
12440
|
-
assert(src1->type == GGML_TYPE_I32);
|
12441
|
-
assert(ggml_nelements(src1) == 3);
|
12442
12154
|
|
12443
12155
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12444
12156
|
return;
|
@@ -12448,9 +12160,9 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12448
12160
|
// dx = rope_back(dy, src1)
|
12449
12161
|
// src0 is dy, src1 contains options
|
12450
12162
|
|
12451
|
-
const int n_past = ((int32_t *)
|
12452
|
-
const int n_dims = ((int32_t *)
|
12453
|
-
const int mode = ((int32_t *)
|
12163
|
+
const int n_past = ((int32_t *) dst->op_params)[0];
|
12164
|
+
const int n_dims = ((int32_t *) dst->op_params)[1];
|
12165
|
+
const int mode = ((int32_t *) dst->op_params)[2];
|
12454
12166
|
|
12455
12167
|
assert(n_past >= 0);
|
12456
12168
|
|
@@ -12534,16 +12246,15 @@ static void ggml_compute_forward_rope_back_f16(
|
|
12534
12246
|
static void ggml_compute_forward_rope_back(
|
12535
12247
|
const struct ggml_compute_params * params,
|
12536
12248
|
const struct ggml_tensor * src0,
|
12537
|
-
const struct ggml_tensor * src1,
|
12538
12249
|
struct ggml_tensor * dst) {
|
12539
12250
|
switch (src0->type) {
|
12540
12251
|
case GGML_TYPE_F16:
|
12541
12252
|
{
|
12542
|
-
ggml_compute_forward_rope_back_f16(params, src0,
|
12253
|
+
ggml_compute_forward_rope_back_f16(params, src0, dst);
|
12543
12254
|
} break;
|
12544
12255
|
case GGML_TYPE_F32:
|
12545
12256
|
{
|
12546
|
-
ggml_compute_forward_rope_back_f32(params, src0,
|
12257
|
+
ggml_compute_forward_rope_back_f32(params, src0, dst);
|
12547
12258
|
} break;
|
12548
12259
|
default:
|
12549
12260
|
{
|
@@ -12740,7 +12451,7 @@ static void ggml_compute_forward_conv_1d_s1_ph(
|
|
12740
12451
|
const struct ggml_compute_params * params,
|
12741
12452
|
const struct ggml_tensor * src0,
|
12742
12453
|
const struct ggml_tensor * src1,
|
12743
|
-
|
12454
|
+
struct ggml_tensor * dst) {
|
12744
12455
|
switch (src0->type) {
|
12745
12456
|
case GGML_TYPE_F16:
|
12746
12457
|
{
|
@@ -12943,7 +12654,7 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12943
12654
|
const struct ggml_compute_params * params,
|
12944
12655
|
const struct ggml_tensor * src0,
|
12945
12656
|
const struct ggml_tensor * src1,
|
12946
|
-
|
12657
|
+
struct ggml_tensor * dst) {
|
12947
12658
|
switch (src0->type) {
|
12948
12659
|
case GGML_TYPE_F16:
|
12949
12660
|
{
|
@@ -12963,14 +12674,13 @@ static void ggml_compute_forward_conv_1d_s2_ph(
|
|
12963
12674
|
// ggml_compute_forward_conv_1d
|
12964
12675
|
|
12965
12676
|
static void ggml_compute_forward_conv_1d(
|
12966
|
-
|
12967
|
-
|
12968
|
-
|
12969
|
-
|
12970
|
-
|
12971
|
-
const int32_t
|
12972
|
-
const int32_t
|
12973
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[2];
|
12677
|
+
const struct ggml_compute_params * params,
|
12678
|
+
const struct ggml_tensor * src0,
|
12679
|
+
const struct ggml_tensor * src1,
|
12680
|
+
struct ggml_tensor * dst) {
|
12681
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12682
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[1];
|
12683
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[2];
|
12974
12684
|
GGML_ASSERT(d0 == 1); // dilation not supported
|
12975
12685
|
GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
|
12976
12686
|
if (s0 == 1) {
|
@@ -12982,9 +12692,9 @@ static void ggml_compute_forward_conv_1d(
|
|
12982
12692
|
};
|
12983
12693
|
}
|
12984
12694
|
|
12985
|
-
//
|
12695
|
+
// ggml_compute_forward_conv_2d
|
12986
12696
|
|
12987
|
-
static void
|
12697
|
+
static void ggml_compute_forward_conv_2d_f16_f32(
|
12988
12698
|
const struct ggml_compute_params * params,
|
12989
12699
|
const struct ggml_tensor * src0,
|
12990
12700
|
const struct ggml_tensor * src1,
|
@@ -13007,28 +12717,37 @@ static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
|
|
13007
12717
|
// size of the convolution row - the kernel size unrolled across all channels
|
13008
12718
|
const int ew0 = nk0*nk1*ne02;
|
13009
12719
|
|
12720
|
+
const int32_t s0 = ((const int32_t*)(dst->op_params))[0];
|
12721
|
+
const int32_t s1 = ((const int32_t*)(dst->op_params))[1];
|
12722
|
+
const int32_t p0 = ((const int32_t*)(dst->op_params))[2];
|
12723
|
+
const int32_t p1 = ((const int32_t*)(dst->op_params))[3];
|
12724
|
+
const int32_t d0 = ((const int32_t*)(dst->op_params))[4];
|
12725
|
+
const int32_t d1 = ((const int32_t*)(dst->op_params))[5];
|
12726
|
+
|
13010
12727
|
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
13011
12728
|
GGML_ASSERT(nb10 == sizeof(float));
|
13012
12729
|
|
13013
12730
|
if (params->type == GGML_TASK_INIT) {
|
13014
|
-
// TODO: fix this memset (wsize is overestimated)
|
13015
12731
|
memset(params->wdata, 0, params->wsize);
|
13016
12732
|
|
13017
12733
|
// prepare source data (src1)
|
13018
12734
|
{
|
13019
12735
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
13020
12736
|
|
13021
|
-
for (int
|
13022
|
-
|
13023
|
-
|
13024
|
-
|
12737
|
+
for (int i12 = 0; i12 < ne12; i12++) {
|
12738
|
+
const float * const src = (float *)((char *) src1->data + i12*nb12);
|
12739
|
+
ggml_fp16_t * dst_data = wdata;
|
12740
|
+
|
12741
|
+
for (int i1 = 0; i1 < ne1; i1++) {
|
12742
|
+
for (int i0 = 0; i0 < ne0; i0++) {
|
12743
|
+
for (int ik1 = 0; ik1 < nk1; ik1++) {
|
12744
|
+
for (int ik0 = 0; ik0 < nk0; ik0++) {
|
12745
|
+
const int idx0 = i0*s0 + ik0*d0 - p0;
|
12746
|
+
const int idx1 = i1*s1 + ik1*d1 - p1;
|
13025
12747
|
|
13026
|
-
|
13027
|
-
for (int i0 = 0; i0 < ne0; i0++) {
|
13028
|
-
for (int ik1 = 0; ik1 < nk1; ik1++) {
|
13029
|
-
for (int ik0 = 0; ik0 < nk0; ik0++) {
|
12748
|
+
if (!(idx1 < 0 || idx1 >= ne11 || idx0 < 0 || idx0 >= ne10)) {
|
13030
12749
|
dst_data[(i1*ne0 + i0)*ew0 + i12*(nk0*nk1) + ik1*nk0 + ik0] =
|
13031
|
-
GGML_FP32_TO_FP16(src[
|
12750
|
+
GGML_FP32_TO_FP16(src[idx1*ne10 + idx0]);
|
13032
12751
|
}
|
13033
12752
|
}
|
13034
12753
|
}
|
@@ -13071,19 +12790,19 @@ static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
|
|
13071
12790
|
}
|
13072
12791
|
}
|
13073
12792
|
|
13074
|
-
static void
|
12793
|
+
static void ggml_compute_forward_conv_2d(
|
13075
12794
|
const struct ggml_compute_params * params,
|
13076
12795
|
const struct ggml_tensor * src0,
|
13077
12796
|
const struct ggml_tensor * src1,
|
13078
|
-
|
12797
|
+
struct ggml_tensor * dst) {
|
13079
12798
|
switch (src0->type) {
|
13080
12799
|
case GGML_TYPE_F16:
|
13081
12800
|
{
|
13082
|
-
|
12801
|
+
ggml_compute_forward_conv_2d_f16_f32(params, src0, src1, dst);
|
13083
12802
|
} break;
|
13084
12803
|
case GGML_TYPE_F32:
|
13085
12804
|
{
|
13086
|
-
//
|
12805
|
+
//ggml_compute_forward_conv_2d_f32(params, src0, src1, dst);
|
13087
12806
|
GGML_ASSERT(false);
|
13088
12807
|
} break;
|
13089
12808
|
default:
|
@@ -13093,32 +12812,6 @@ static void ggml_compute_forward_conv_2d_sk_p0(
|
|
13093
12812
|
}
|
13094
12813
|
}
|
13095
12814
|
|
13096
|
-
// ggml_compute_forward_conv_2d
|
13097
|
-
|
13098
|
-
static void ggml_compute_forward_conv_2d(
|
13099
|
-
const struct ggml_compute_params* params,
|
13100
|
-
const struct ggml_tensor* src0,
|
13101
|
-
const struct ggml_tensor* src1,
|
13102
|
-
const struct ggml_tensor* opt0,
|
13103
|
-
struct ggml_tensor* dst) {
|
13104
|
-
const int32_t s0 = ((const int32_t*)(opt0->data))[0];
|
13105
|
-
const int32_t s1 = ((const int32_t*)(opt0->data))[1];
|
13106
|
-
const int32_t p0 = ((const int32_t*)(opt0->data))[2];
|
13107
|
-
const int32_t p1 = ((const int32_t*)(opt0->data))[3];
|
13108
|
-
const int32_t d0 = ((const int32_t*)(opt0->data))[4];
|
13109
|
-
const int32_t d1 = ((const int32_t*)(opt0->data))[5];
|
13110
|
-
GGML_ASSERT(d0 == 1); // dilation not supported
|
13111
|
-
GGML_ASSERT(d1 == 1);
|
13112
|
-
GGML_ASSERT(p0 == 0); // padding not supported
|
13113
|
-
GGML_ASSERT(p1 == 0);
|
13114
|
-
|
13115
|
-
if (s0 == src0->ne[0] && s1 == src0->ne[1]) {
|
13116
|
-
ggml_compute_forward_conv_2d_sk_p0(params, src0, src1, dst);
|
13117
|
-
} else {
|
13118
|
-
GGML_ASSERT(false); // only stride equal to kernel size is supported
|
13119
|
-
}
|
13120
|
-
}
|
13121
|
-
|
13122
12815
|
// ggml_compute_forward_pool_1d_sk_p0
|
13123
12816
|
|
13124
12817
|
static void ggml_compute_forward_pool_1d_sk_p0(
|
@@ -13174,12 +12867,11 @@ static void ggml_compute_forward_pool_1d_sk_p0(
|
|
13174
12867
|
// ggml_compute_forward_pool_1d
|
13175
12868
|
|
13176
12869
|
static void ggml_compute_forward_pool_1d(
|
13177
|
-
|
13178
|
-
|
13179
|
-
|
13180
|
-
|
13181
|
-
|
13182
|
-
const int* opts = (const int*)opt0->data;
|
12870
|
+
const struct ggml_compute_params * params,
|
12871
|
+
const struct ggml_tensor * src0,
|
12872
|
+
struct ggml_tensor * dst) {
|
12873
|
+
|
12874
|
+
const int32_t* opts = (const int32_t*)dst->op_params;
|
13183
12875
|
enum ggml_op_pool op = opts[0];
|
13184
12876
|
const int k0 = opts[1];
|
13185
12877
|
const int s0 = opts[2];
|
@@ -13193,12 +12885,12 @@ static void ggml_compute_forward_pool_1d(
|
|
13193
12885
|
// ggml_compute_forward_pool_2d_sk_p0
|
13194
12886
|
|
13195
12887
|
static void ggml_compute_forward_pool_2d_sk_p0(
|
13196
|
-
|
13197
|
-
|
13198
|
-
|
13199
|
-
|
13200
|
-
|
13201
|
-
|
12888
|
+
const struct ggml_compute_params * params,
|
12889
|
+
const enum ggml_op_pool op,
|
12890
|
+
const struct ggml_tensor * src,
|
12891
|
+
const int k0,
|
12892
|
+
const int k1,
|
12893
|
+
struct ggml_tensor * dst) {
|
13202
12894
|
assert(src->type == GGML_TYPE_F32);
|
13203
12895
|
assert(params->ith == 0);
|
13204
12896
|
|
@@ -13258,12 +12950,11 @@ static void ggml_compute_forward_pool_2d_sk_p0(
|
|
13258
12950
|
// ggml_compute_forward_pool_2d
|
13259
12951
|
|
13260
12952
|
static void ggml_compute_forward_pool_2d(
|
13261
|
-
|
13262
|
-
|
13263
|
-
|
13264
|
-
|
13265
|
-
|
13266
|
-
const int* opts = (const int*)opt0->data;
|
12953
|
+
const struct ggml_compute_params * params,
|
12954
|
+
const struct ggml_tensor * src0,
|
12955
|
+
struct ggml_tensor * dst) {
|
12956
|
+
|
12957
|
+
const int32_t * opts = (const int32_t *)dst->op_params;
|
13267
12958
|
enum ggml_op_pool op = opts[0];
|
13268
12959
|
const int k0 = opts[1];
|
13269
12960
|
const int k1 = opts[2];
|
@@ -13288,7 +12979,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
13288
12979
|
const struct ggml_tensor * k,
|
13289
12980
|
const struct ggml_tensor * v,
|
13290
12981
|
const bool masked,
|
13291
|
-
|
12982
|
+
struct ggml_tensor * dst) {
|
13292
12983
|
int64_t t0 = ggml_perf_time_us();
|
13293
12984
|
UNUSED(t0);
|
13294
12985
|
|
@@ -13466,7 +13157,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
13466
13157
|
const struct ggml_tensor * k,
|
13467
13158
|
const struct ggml_tensor * v,
|
13468
13159
|
const bool masked,
|
13469
|
-
|
13160
|
+
struct ggml_tensor * dst) {
|
13470
13161
|
int64_t t0 = ggml_perf_time_us();
|
13471
13162
|
UNUSED(t0);
|
13472
13163
|
|
@@ -14231,7 +13922,6 @@ static void ggml_compute_forward_flash_attn_back(
|
|
14231
13922
|
static void ggml_compute_forward_win_part_f32(
|
14232
13923
|
const struct ggml_compute_params * params,
|
14233
13924
|
const struct ggml_tensor * src0,
|
14234
|
-
const struct ggml_tensor * opt0,
|
14235
13925
|
struct ggml_tensor * dst) {
|
14236
13926
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14237
13927
|
return;
|
@@ -14240,9 +13930,9 @@ static void ggml_compute_forward_win_part_f32(
|
|
14240
13930
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14241
13931
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14242
13932
|
|
14243
|
-
const int32_t nep0 = ((const int32_t *)(
|
14244
|
-
const int32_t nep1 = ((const int32_t *)(
|
14245
|
-
const int32_t w = ((const int32_t *)(
|
13933
|
+
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
|
13934
|
+
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
|
13935
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[2];
|
14246
13936
|
|
14247
13937
|
assert(ne00 == ne0);
|
14248
13938
|
assert(ne3 == nep0*nep1);
|
@@ -14276,12 +13966,11 @@ static void ggml_compute_forward_win_part_f32(
|
|
14276
13966
|
static void ggml_compute_forward_win_part(
|
14277
13967
|
const struct ggml_compute_params * params,
|
14278
13968
|
const struct ggml_tensor * src0,
|
14279
|
-
const struct ggml_tensor * opt0,
|
14280
13969
|
struct ggml_tensor * dst) {
|
14281
13970
|
switch (src0->type) {
|
14282
13971
|
case GGML_TYPE_F32:
|
14283
13972
|
{
|
14284
|
-
ggml_compute_forward_win_part_f32(params, src0,
|
13973
|
+
ggml_compute_forward_win_part_f32(params, src0, dst);
|
14285
13974
|
} break;
|
14286
13975
|
default:
|
14287
13976
|
{
|
@@ -14295,7 +13984,6 @@ static void ggml_compute_forward_win_part(
|
|
14295
13984
|
static void ggml_compute_forward_win_unpart_f32(
|
14296
13985
|
const struct ggml_compute_params * params,
|
14297
13986
|
const struct ggml_tensor * src0,
|
14298
|
-
const struct ggml_tensor * opt0,
|
14299
13987
|
struct ggml_tensor * dst) {
|
14300
13988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
14301
13989
|
return;
|
@@ -14304,7 +13992,7 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14304
13992
|
GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
|
14305
13993
|
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
14306
13994
|
|
14307
|
-
const int32_t w = ((const int32_t *)(
|
13995
|
+
const int32_t w = ((const int32_t *)(dst->op_params))[0];
|
14308
13996
|
|
14309
13997
|
// padding
|
14310
13998
|
const int px = (w - ne1%w)%w;
|
@@ -14338,12 +14026,67 @@ static void ggml_compute_forward_win_unpart_f32(
|
|
14338
14026
|
static void ggml_compute_forward_win_unpart(
|
14339
14027
|
const struct ggml_compute_params * params,
|
14340
14028
|
const struct ggml_tensor * src0,
|
14341
|
-
const struct ggml_tensor * opt0,
|
14342
14029
|
struct ggml_tensor * dst) {
|
14343
14030
|
switch (src0->type) {
|
14344
14031
|
case GGML_TYPE_F32:
|
14345
14032
|
{
|
14346
|
-
ggml_compute_forward_win_unpart_f32(params, src0,
|
14033
|
+
ggml_compute_forward_win_unpart_f32(params, src0, dst);
|
14034
|
+
} break;
|
14035
|
+
default:
|
14036
|
+
{
|
14037
|
+
GGML_ASSERT(false);
|
14038
|
+
} break;
|
14039
|
+
}
|
14040
|
+
}
|
14041
|
+
|
14042
|
+
//gmml_compute_forward_unary
|
14043
|
+
|
14044
|
+
static void ggml_compute_forward_unary(
|
14045
|
+
const struct ggml_compute_params * params,
|
14046
|
+
const struct ggml_tensor * src0,
|
14047
|
+
struct ggml_tensor * dst) {
|
14048
|
+
const enum ggml_unary_op op = ggml_get_unary_op(dst);
|
14049
|
+
|
14050
|
+
switch (op) {
|
14051
|
+
case GGML_UNARY_OP_ABS:
|
14052
|
+
{
|
14053
|
+
ggml_compute_forward_abs(params, src0, dst);
|
14054
|
+
} break;
|
14055
|
+
case GGML_UNARY_OP_SGN:
|
14056
|
+
{
|
14057
|
+
ggml_compute_forward_sgn(params, src0, dst);
|
14058
|
+
} break;
|
14059
|
+
case GGML_UNARY_OP_NEG:
|
14060
|
+
{
|
14061
|
+
ggml_compute_forward_neg(params, src0, dst);
|
14062
|
+
} break;
|
14063
|
+
case GGML_UNARY_OP_STEP:
|
14064
|
+
{
|
14065
|
+
ggml_compute_forward_step(params, src0, dst);
|
14066
|
+
} break;
|
14067
|
+
case GGML_UNARY_OP_TANH:
|
14068
|
+
{
|
14069
|
+
ggml_compute_forward_tanh(params, src0, dst);
|
14070
|
+
} break;
|
14071
|
+
case GGML_UNARY_OP_ELU:
|
14072
|
+
{
|
14073
|
+
ggml_compute_forward_elu(params, src0, dst);
|
14074
|
+
} break;
|
14075
|
+
case GGML_UNARY_OP_RELU:
|
14076
|
+
{
|
14077
|
+
ggml_compute_forward_relu(params, src0, dst);
|
14078
|
+
} break;
|
14079
|
+
case GGML_UNARY_OP_GELU:
|
14080
|
+
{
|
14081
|
+
ggml_compute_forward_gelu(params, src0, dst);
|
14082
|
+
} break;
|
14083
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
14084
|
+
{
|
14085
|
+
ggml_compute_forward_gelu_quick(params, src0, dst);
|
14086
|
+
} break;
|
14087
|
+
case GGML_UNARY_OP_SILU:
|
14088
|
+
{
|
14089
|
+
ggml_compute_forward_silu(params, src0, dst);
|
14347
14090
|
} break;
|
14348
14091
|
default:
|
14349
14092
|
{
|
@@ -14862,7 +14605,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14862
14605
|
} break;
|
14863
14606
|
case GGML_OP_ACC:
|
14864
14607
|
{
|
14865
|
-
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor
|
14608
|
+
ggml_compute_forward_acc(params, tensor->src[0], tensor->src[1], tensor);
|
14866
14609
|
} break;
|
14867
14610
|
case GGML_OP_SUB:
|
14868
14611
|
{
|
@@ -14912,46 +14655,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14912
14655
|
{
|
14913
14656
|
ggml_compute_forward_repeat_back(params, tensor->src[0], tensor);
|
14914
14657
|
} break;
|
14915
|
-
case GGML_OP_ABS:
|
14916
|
-
{
|
14917
|
-
ggml_compute_forward_abs(params, tensor->src[0], tensor);
|
14918
|
-
} break;
|
14919
|
-
case GGML_OP_SGN:
|
14920
|
-
{
|
14921
|
-
ggml_compute_forward_sgn(params, tensor->src[0], tensor);
|
14922
|
-
} break;
|
14923
|
-
case GGML_OP_NEG:
|
14924
|
-
{
|
14925
|
-
ggml_compute_forward_neg(params, tensor->src[0], tensor);
|
14926
|
-
} break;
|
14927
|
-
case GGML_OP_STEP:
|
14928
|
-
{
|
14929
|
-
ggml_compute_forward_step(params, tensor->src[0], tensor);
|
14930
|
-
} break;
|
14931
|
-
case GGML_OP_TANH:
|
14932
|
-
{
|
14933
|
-
ggml_compute_forward_tanh(params, tensor->src[0], tensor);
|
14934
|
-
} break;
|
14935
|
-
case GGML_OP_ELU:
|
14936
|
-
{
|
14937
|
-
ggml_compute_forward_elu(params, tensor->src[0], tensor);
|
14938
|
-
} break;
|
14939
|
-
case GGML_OP_RELU:
|
14940
|
-
{
|
14941
|
-
ggml_compute_forward_relu(params, tensor->src[0], tensor);
|
14942
|
-
} break;
|
14943
|
-
case GGML_OP_GELU:
|
14944
|
-
{
|
14945
|
-
ggml_compute_forward_gelu(params, tensor->src[0], tensor);
|
14946
|
-
} break;
|
14947
|
-
case GGML_OP_GELU_QUICK:
|
14948
|
-
{
|
14949
|
-
ggml_compute_forward_gelu_quick(params, tensor->src[0], tensor);
|
14950
|
-
} break;
|
14951
|
-
case GGML_OP_SILU:
|
14952
|
-
{
|
14953
|
-
ggml_compute_forward_silu(params, tensor->src[0], tensor);
|
14954
|
-
} break;
|
14955
14658
|
case GGML_OP_SILU_BACK:
|
14956
14659
|
{
|
14957
14660
|
ggml_compute_forward_silu_back(params, tensor->src[0], tensor->src[1], tensor);
|
@@ -14982,7 +14685,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14982
14685
|
} break;
|
14983
14686
|
case GGML_OP_SET:
|
14984
14687
|
{
|
14985
|
-
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor
|
14688
|
+
ggml_compute_forward_set(params, tensor->src[0], tensor->src[1], tensor);
|
14986
14689
|
} break;
|
14987
14690
|
case GGML_OP_CPY:
|
14988
14691
|
{
|
@@ -15022,11 +14725,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15022
14725
|
} break;
|
15023
14726
|
case GGML_OP_DIAG_MASK_INF:
|
15024
14727
|
{
|
15025
|
-
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor
|
14728
|
+
ggml_compute_forward_diag_mask_inf(params, tensor->src[0], tensor);
|
15026
14729
|
} break;
|
15027
14730
|
case GGML_OP_DIAG_MASK_ZERO:
|
15028
14731
|
{
|
15029
|
-
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor
|
14732
|
+
ggml_compute_forward_diag_mask_zero(params, tensor->src[0], tensor);
|
15030
14733
|
} break;
|
15031
14734
|
case GGML_OP_SOFT_MAX:
|
15032
14735
|
{
|
@@ -15038,39 +14741,39 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15038
14741
|
} break;
|
15039
14742
|
case GGML_OP_ROPE:
|
15040
14743
|
{
|
15041
|
-
ggml_compute_forward_rope(params, tensor->src[0], tensor
|
14744
|
+
ggml_compute_forward_rope(params, tensor->src[0], tensor);
|
15042
14745
|
} break;
|
15043
14746
|
case GGML_OP_ROPE_BACK:
|
15044
14747
|
{
|
15045
|
-
ggml_compute_forward_rope_back(params, tensor->src[0], tensor
|
14748
|
+
ggml_compute_forward_rope_back(params, tensor->src[0], tensor);
|
15046
14749
|
} break;
|
15047
14750
|
case GGML_OP_ALIBI:
|
15048
14751
|
{
|
15049
|
-
ggml_compute_forward_alibi(params, tensor->src[0], tensor
|
14752
|
+
ggml_compute_forward_alibi(params, tensor->src[0], tensor);
|
15050
14753
|
} break;
|
15051
14754
|
case GGML_OP_CLAMP:
|
15052
14755
|
{
|
15053
|
-
ggml_compute_forward_clamp(params, tensor->src[0], tensor
|
14756
|
+
ggml_compute_forward_clamp(params, tensor->src[0], tensor);
|
15054
14757
|
} break;
|
15055
14758
|
case GGML_OP_CONV_1D:
|
15056
14759
|
{
|
15057
|
-
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor
|
14760
|
+
ggml_compute_forward_conv_1d(params, tensor->src[0], tensor->src[1], tensor);
|
15058
14761
|
} break;
|
15059
14762
|
case GGML_OP_CONV_2D:
|
15060
14763
|
{
|
15061
|
-
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor
|
14764
|
+
ggml_compute_forward_conv_2d(params, tensor->src[0], tensor->src[1], tensor);
|
15062
14765
|
} break;
|
15063
14766
|
case GGML_OP_POOL_1D:
|
15064
14767
|
{
|
15065
|
-
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor
|
14768
|
+
ggml_compute_forward_pool_1d(params, tensor->src[0], tensor);
|
15066
14769
|
} break;
|
15067
14770
|
case GGML_OP_POOL_2D:
|
15068
14771
|
{
|
15069
|
-
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor
|
14772
|
+
ggml_compute_forward_pool_2d(params, tensor->src[0], tensor);
|
15070
14773
|
} break;
|
15071
14774
|
case GGML_OP_FLASH_ATTN:
|
15072
14775
|
{
|
15073
|
-
const int32_t t =
|
14776
|
+
const int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15074
14777
|
GGML_ASSERT(t == 0 || t == 1);
|
15075
14778
|
const bool masked = t != 0;
|
15076
14779
|
ggml_compute_forward_flash_attn(params, tensor->src[0], tensor->src[1], tensor->src[2], masked, tensor);
|
@@ -15081,47 +14784,56 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
15081
14784
|
} break;
|
15082
14785
|
case GGML_OP_FLASH_ATTN_BACK:
|
15083
14786
|
{
|
15084
|
-
int32_t t =
|
14787
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15085
14788
|
GGML_ASSERT(t == 0 || t == 1);
|
15086
14789
|
bool masked = t != 0;
|
15087
14790
|
ggml_compute_forward_flash_attn_back(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor->src[3], masked, tensor);
|
15088
14791
|
} break;
|
15089
14792
|
case GGML_OP_WIN_PART:
|
15090
14793
|
{
|
15091
|
-
ggml_compute_forward_win_part(params, tensor->src[0], tensor
|
14794
|
+
ggml_compute_forward_win_part(params, tensor->src[0], tensor);
|
15092
14795
|
} break;
|
15093
14796
|
case GGML_OP_WIN_UNPART:
|
15094
14797
|
{
|
15095
|
-
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor
|
14798
|
+
ggml_compute_forward_win_unpart(params, tensor->src[0], tensor);
|
14799
|
+
} break;
|
14800
|
+
case GGML_OP_UNARY:
|
14801
|
+
{
|
14802
|
+
ggml_compute_forward_unary(params, tensor->src[0], tensor);
|
15096
14803
|
} break;
|
15097
14804
|
case GGML_OP_MAP_UNARY:
|
15098
14805
|
{
|
15099
|
-
|
14806
|
+
ggml_unary_op_f32_t fun;
|
14807
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15100
14808
|
ggml_compute_forward_map_unary(params, tensor->src[0], tensor, fun);
|
15101
14809
|
}
|
15102
14810
|
break;
|
15103
14811
|
case GGML_OP_MAP_BINARY:
|
15104
14812
|
{
|
15105
|
-
|
14813
|
+
ggml_binary_op_f32_t fun;
|
14814
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15106
14815
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15107
14816
|
}
|
15108
14817
|
break;
|
15109
14818
|
case GGML_OP_MAP_CUSTOM1:
|
15110
14819
|
{
|
15111
|
-
|
14820
|
+
ggml_custom1_op_f32_t fun;
|
14821
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15112
14822
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
15113
14823
|
}
|
15114
14824
|
break;
|
15115
14825
|
case GGML_OP_MAP_CUSTOM2:
|
15116
14826
|
{
|
15117
|
-
|
14827
|
+
ggml_custom2_op_f32_t fun;
|
14828
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
15118
14829
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
15119
14830
|
}
|
15120
14831
|
break;
|
15121
14832
|
case GGML_OP_MAP_CUSTOM3:
|
15122
14833
|
{
|
15123
|
-
|
15124
|
-
|
14834
|
+
ggml_custom3_op_f32_t fun;
|
14835
|
+
memcpy(&fun, tensor->op_params, sizeof(fun));
|
14836
|
+
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
15125
14837
|
}
|
15126
14838
|
break;
|
15127
14839
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
@@ -15185,12 +14897,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15185
14897
|
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
|
15186
14898
|
}
|
15187
14899
|
if (src1->grad) {
|
15188
|
-
|
15189
|
-
|
15190
|
-
const size_t
|
15191
|
-
const size_t
|
15192
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15193
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
14900
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
14901
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
14902
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
14903
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15194
14904
|
|
15195
14905
|
struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
|
15196
14906
|
tensor->grad,
|
@@ -15339,73 +15049,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15339
15049
|
inplace);
|
15340
15050
|
}
|
15341
15051
|
} break;
|
15342
|
-
case GGML_OP_ABS:
|
15343
|
-
{
|
15344
|
-
if (src0->grad) {
|
15345
|
-
src0->grad =
|
15346
|
-
ggml_add_impl(ctx,
|
15347
|
-
src0->grad,
|
15348
|
-
ggml_mul(ctx,
|
15349
|
-
ggml_sgn(ctx, src0),
|
15350
|
-
tensor->grad),
|
15351
|
-
inplace);
|
15352
|
-
}
|
15353
|
-
} break;
|
15354
|
-
case GGML_OP_SGN:
|
15355
|
-
{
|
15356
|
-
if (src0->grad) {
|
15357
|
-
// noop
|
15358
|
-
}
|
15359
|
-
} break;
|
15360
|
-
case GGML_OP_NEG:
|
15361
|
-
{
|
15362
|
-
if (src0->grad) {
|
15363
|
-
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15364
|
-
}
|
15365
|
-
} break;
|
15366
|
-
case GGML_OP_STEP:
|
15367
|
-
{
|
15368
|
-
if (src0->grad) {
|
15369
|
-
// noop
|
15370
|
-
}
|
15371
|
-
} break;
|
15372
|
-
case GGML_OP_TANH:
|
15373
|
-
{
|
15374
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15375
|
-
} break;
|
15376
|
-
case GGML_OP_ELU:
|
15377
|
-
{
|
15378
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15379
|
-
} break;
|
15380
|
-
case GGML_OP_RELU:
|
15381
|
-
{
|
15382
|
-
if (src0->grad) {
|
15383
|
-
src0->grad = ggml_sub_impl(ctx,
|
15384
|
-
src0->grad,
|
15385
|
-
ggml_mul(ctx,
|
15386
|
-
ggml_step(ctx, src0),
|
15387
|
-
tensor->grad),
|
15388
|
-
inplace);
|
15389
|
-
}
|
15390
|
-
} break;
|
15391
|
-
case GGML_OP_GELU:
|
15392
|
-
{
|
15393
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15394
|
-
} break;
|
15395
|
-
case GGML_OP_GELU_QUICK:
|
15396
|
-
{
|
15397
|
-
GGML_ASSERT(false); // TODO: not implemented
|
15398
|
-
} break;
|
15399
|
-
case GGML_OP_SILU:
|
15400
|
-
{
|
15401
|
-
// necessary for llama
|
15402
|
-
if (src0->grad) {
|
15403
|
-
src0->grad = ggml_add_impl(ctx,
|
15404
|
-
src0->grad,
|
15405
|
-
ggml_silu_back(ctx, src0, tensor->grad),
|
15406
|
-
inplace);
|
15407
|
-
}
|
15408
|
-
} break;
|
15409
15052
|
case GGML_OP_SILU_BACK:
|
15410
15053
|
{
|
15411
15054
|
GGML_ASSERT(false); // TODO: not implemented
|
@@ -15498,12 +15141,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15498
15141
|
} break;
|
15499
15142
|
case GGML_OP_SET:
|
15500
15143
|
{
|
15501
|
-
|
15502
|
-
|
15503
|
-
const size_t
|
15504
|
-
const size_t
|
15505
|
-
const size_t nb3 = (( int32_t * ) tensor->src[2]->data)[2];
|
15506
|
-
const size_t offset = (( int32_t * ) tensor->src[2]->data)[3];
|
15144
|
+
const size_t nb1 = ((int32_t *) tensor->op_params)[0];
|
15145
|
+
const size_t nb2 = ((int32_t *) tensor->op_params)[1];
|
15146
|
+
const size_t nb3 = ((int32_t *) tensor->op_params)[2];
|
15147
|
+
const size_t offset = ((int32_t *) tensor->op_params)[3];
|
15507
15148
|
|
15508
15149
|
struct ggml_tensor * tensor_grad_view = NULL;
|
15509
15150
|
|
@@ -15580,8 +15221,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15580
15221
|
if (src0->grad) {
|
15581
15222
|
size_t offset;
|
15582
15223
|
|
15583
|
-
|
15584
|
-
memcpy(&offset, tensor->src[2]->data, sizeof(offset));
|
15224
|
+
memcpy(&offset, tensor->op_params, sizeof(offset));
|
15585
15225
|
|
15586
15226
|
size_t nb1 = tensor->nb[1];
|
15587
15227
|
size_t nb2 = tensor->nb[2];
|
@@ -15608,7 +15248,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15608
15248
|
{
|
15609
15249
|
// necessary for llama
|
15610
15250
|
if (src0->grad) {
|
15611
|
-
int32_t * axes = (int32_t *) tensor->
|
15251
|
+
int32_t * axes = (int32_t *) tensor->op_params;
|
15612
15252
|
int axis0 = axes[0] & 0x3;
|
15613
15253
|
int axis1 = axes[1] & 0x3;
|
15614
15254
|
int axis2 = axes[2] & 0x3;
|
@@ -15664,33 +15304,23 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15664
15304
|
{
|
15665
15305
|
// necessary for llama
|
15666
15306
|
if (src0->grad) {
|
15667
|
-
|
15668
|
-
assert(ggml_nelements(src1) == 2);
|
15669
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15307
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15670
15308
|
src0->grad =
|
15671
15309
|
ggml_add_impl(ctx, src0->grad,
|
15672
15310
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15673
15311
|
inplace);
|
15674
15312
|
}
|
15675
|
-
if (src1->grad) {
|
15676
|
-
// noop
|
15677
|
-
}
|
15678
15313
|
} break;
|
15679
15314
|
case GGML_OP_DIAG_MASK_ZERO:
|
15680
15315
|
{
|
15681
15316
|
// necessary for llama
|
15682
15317
|
if (src0->grad) {
|
15683
|
-
|
15684
|
-
assert(ggml_nelements(src1) == 2);
|
15685
|
-
const int n_past = ((int32_t *) src1->data)[0];
|
15318
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15686
15319
|
src0->grad =
|
15687
15320
|
ggml_add_impl(ctx, src0->grad,
|
15688
15321
|
ggml_diag_mask_zero_impl(ctx, tensor->grad, n_past, false),
|
15689
15322
|
inplace);
|
15690
15323
|
}
|
15691
|
-
if (src1->grad) {
|
15692
|
-
// noop
|
15693
|
-
}
|
15694
15324
|
} break;
|
15695
15325
|
case GGML_OP_SOFT_MAX:
|
15696
15326
|
{
|
@@ -15711,33 +15341,28 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15711
15341
|
{
|
15712
15342
|
// necessary for llama
|
15713
15343
|
if (src0->grad) {
|
15714
|
-
|
15715
|
-
|
15716
|
-
const int
|
15717
|
-
const int
|
15718
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15344
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15345
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15346
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15347
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15719
15348
|
src0->grad = ggml_add_impl(ctx,
|
15720
15349
|
src0->grad,
|
15721
15350
|
ggml_rope_back(ctx,
|
15722
15351
|
tensor->grad,
|
15723
15352
|
n_past,
|
15724
15353
|
n_dims,
|
15725
|
-
mode
|
15354
|
+
mode,
|
15355
|
+
n_ctx),
|
15726
15356
|
inplace);
|
15727
15357
|
}
|
15728
|
-
if (src1->grad) {
|
15729
|
-
// noop
|
15730
|
-
}
|
15731
15358
|
} break;
|
15732
15359
|
case GGML_OP_ROPE_BACK:
|
15733
15360
|
{
|
15734
15361
|
if (src0->grad) {
|
15735
|
-
|
15736
|
-
|
15737
|
-
const int
|
15738
|
-
const int
|
15739
|
-
const int mode = ((int32_t *) src1->data)[2];
|
15740
|
-
const int n_ctx = ((int32_t *) src1->data)[3];
|
15362
|
+
const int n_past = ((int32_t *) tensor->op_params)[0];
|
15363
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15364
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15365
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15741
15366
|
src0->grad = ggml_add_impl(ctx,
|
15742
15367
|
src0->grad,
|
15743
15368
|
ggml_rope(ctx,
|
@@ -15748,9 +15373,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15748
15373
|
n_ctx),
|
15749
15374
|
inplace);
|
15750
15375
|
}
|
15751
|
-
if (src1->grad) {
|
15752
|
-
// noop
|
15753
|
-
}
|
15754
15376
|
} break;
|
15755
15377
|
case GGML_OP_ALIBI:
|
15756
15378
|
{
|
@@ -15780,7 +15402,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15780
15402
|
{
|
15781
15403
|
struct ggml_tensor * flash_grad = NULL;
|
15782
15404
|
if (src0->grad || src1->grad || tensor->src[2]->grad) {
|
15783
|
-
int32_t t =
|
15405
|
+
int32_t t = ggml_get_op_params_i32(tensor, 0);
|
15784
15406
|
GGML_ASSERT(t == 0 || t == 1);
|
15785
15407
|
bool masked = t != 0;
|
15786
15408
|
flash_grad =
|
@@ -15943,6 +15565,80 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15943
15565
|
} break;
|
15944
15566
|
case GGML_OP_WIN_PART:
|
15945
15567
|
case GGML_OP_WIN_UNPART:
|
15568
|
+
case GGML_OP_UNARY:
|
15569
|
+
{
|
15570
|
+
switch (ggml_get_unary_op(tensor)) {
|
15571
|
+
case GGML_UNARY_OP_ABS:
|
15572
|
+
{
|
15573
|
+
if (src0->grad) {
|
15574
|
+
src0->grad =
|
15575
|
+
ggml_add_impl(ctx,
|
15576
|
+
src0->grad,
|
15577
|
+
ggml_mul(ctx,
|
15578
|
+
ggml_sgn(ctx, src0),
|
15579
|
+
tensor->grad),
|
15580
|
+
inplace);
|
15581
|
+
}
|
15582
|
+
} break;
|
15583
|
+
case GGML_UNARY_OP_SGN:
|
15584
|
+
{
|
15585
|
+
if (src0->grad) {
|
15586
|
+
// noop
|
15587
|
+
}
|
15588
|
+
} break;
|
15589
|
+
case GGML_UNARY_OP_NEG:
|
15590
|
+
{
|
15591
|
+
if (src0->grad) {
|
15592
|
+
src0->grad = ggml_sub_impl(ctx, src0->grad, tensor->grad, inplace);
|
15593
|
+
}
|
15594
|
+
} break;
|
15595
|
+
case GGML_UNARY_OP_STEP:
|
15596
|
+
{
|
15597
|
+
if (src0->grad) {
|
15598
|
+
// noop
|
15599
|
+
}
|
15600
|
+
} break;
|
15601
|
+
case GGML_UNARY_OP_TANH:
|
15602
|
+
{
|
15603
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15604
|
+
} break;
|
15605
|
+
case GGML_UNARY_OP_ELU:
|
15606
|
+
{
|
15607
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15608
|
+
} break;
|
15609
|
+
case GGML_UNARY_OP_RELU:
|
15610
|
+
{
|
15611
|
+
if (src0->grad) {
|
15612
|
+
src0->grad = ggml_add_impl(ctx,
|
15613
|
+
src0->grad,
|
15614
|
+
ggml_mul(ctx,
|
15615
|
+
ggml_step(ctx, src0),
|
15616
|
+
tensor->grad),
|
15617
|
+
inplace);
|
15618
|
+
}
|
15619
|
+
} break;
|
15620
|
+
case GGML_UNARY_OP_GELU:
|
15621
|
+
{
|
15622
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15623
|
+
} break;
|
15624
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
15625
|
+
{
|
15626
|
+
GGML_ASSERT(false); // TODO: not implemented
|
15627
|
+
} break;
|
15628
|
+
case GGML_UNARY_OP_SILU:
|
15629
|
+
{
|
15630
|
+
// necessary for llama
|
15631
|
+
if (src0->grad) {
|
15632
|
+
src0->grad = ggml_add_impl(ctx,
|
15633
|
+
src0->grad,
|
15634
|
+
ggml_silu_back(ctx, src0, tensor->grad),
|
15635
|
+
inplace);
|
15636
|
+
}
|
15637
|
+
} break;
|
15638
|
+
default:
|
15639
|
+
GGML_ASSERT(false);
|
15640
|
+
}
|
15641
|
+
} break;
|
15946
15642
|
case GGML_OP_MAP_UNARY:
|
15947
15643
|
case GGML_OP_MAP_BINARY:
|
15948
15644
|
case GGML_OP_MAP_CUSTOM1:
|
@@ -15978,6 +15674,34 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15978
15674
|
}
|
15979
15675
|
}
|
15980
15676
|
|
15677
|
+
static_assert(GGML_GRAPH_HASHTABLE_SIZE > GGML_MAX_NODES * 2, "GGML_GRAPH_HT_SIZE is too small");
|
15678
|
+
|
15679
|
+
static size_t hash(void * p) {
|
15680
|
+
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
15681
|
+
}
|
15682
|
+
|
15683
|
+
static bool hash_insert(void * hash_table[], void * p) {
|
15684
|
+
size_t h = hash(p);
|
15685
|
+
|
15686
|
+
// linear probing
|
15687
|
+
size_t i = h;
|
15688
|
+
while (hash_table[i] != NULL && hash_table[i] != p) {
|
15689
|
+
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
15690
|
+
if (i == h) {
|
15691
|
+
// hash table is full
|
15692
|
+
GGML_ASSERT(false);
|
15693
|
+
}
|
15694
|
+
}
|
15695
|
+
|
15696
|
+
if (hash_table[i] == p) {
|
15697
|
+
return true;
|
15698
|
+
}
|
15699
|
+
|
15700
|
+
// insert
|
15701
|
+
hash_table[i] = p;
|
15702
|
+
return false;
|
15703
|
+
}
|
15704
|
+
|
15981
15705
|
static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
|
15982
15706
|
if (node->grad == NULL) {
|
15983
15707
|
// this usually happens when we generate intermediate nodes from constants in the backward pass
|
@@ -15988,16 +15712,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
|
|
15988
15712
|
}
|
15989
15713
|
|
15990
15714
|
// check if already visited
|
15991
|
-
|
15992
|
-
|
15993
|
-
return;
|
15994
|
-
}
|
15995
|
-
}
|
15996
|
-
|
15997
|
-
for (int i = 0; i < cgraph->n_leafs; i++) {
|
15998
|
-
if (cgraph->leafs[i] == node) {
|
15999
|
-
return;
|
16000
|
-
}
|
15715
|
+
if (hash_insert(cgraph->visited_hash_table, node)) {
|
15716
|
+
return;
|
16001
15717
|
}
|
16002
15718
|
|
16003
15719
|
for (int i = 0; i < GGML_MAX_SRC; ++i) {
|
@@ -16060,6 +15776,7 @@ struct ggml_cgraph ggml_build_forward(struct ggml_tensor * tensor) {
|
|
16060
15776
|
/*.nodes =*/ { NULL },
|
16061
15777
|
/*.grads =*/ { NULL },
|
16062
15778
|
/*.leafs =*/ { NULL },
|
15779
|
+
/*.hash_table =*/ { NULL },
|
16063
15780
|
/*.perf_runs =*/ 0,
|
16064
15781
|
/*.perf_cycles =*/ 0,
|
16065
15782
|
/*.perf_time_us =*/ 0,
|
@@ -16101,13 +15818,42 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
|
|
16101
15818
|
|
16102
15819
|
if (node->is_param) {
|
16103
15820
|
GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
|
16104
|
-
|
15821
|
+
ggml_build_forward_expand(&result, node->grad);
|
16105
15822
|
}
|
16106
15823
|
}
|
16107
15824
|
|
16108
15825
|
return result;
|
16109
15826
|
}
|
16110
15827
|
|
15828
|
+
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
|
15829
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
|
15830
|
+
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
|
15831
|
+
|
15832
|
+
*cgraph = (struct ggml_cgraph) {
|
15833
|
+
/*.n_nodes =*/ 0,
|
15834
|
+
/*.n_leafs =*/ 0,
|
15835
|
+
/*.nodes =*/ { NULL },
|
15836
|
+
/*.grads =*/ { NULL },
|
15837
|
+
/*.leafs =*/ { NULL },
|
15838
|
+
/*.hash_table =*/ { NULL },
|
15839
|
+
/*.perf_runs =*/ 0,
|
15840
|
+
/*.perf_cycles =*/ 0,
|
15841
|
+
/*.perf_time_us =*/ 0,
|
15842
|
+
};
|
15843
|
+
|
15844
|
+
return cgraph;
|
15845
|
+
}
|
15846
|
+
|
15847
|
+
struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
|
15848
|
+
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
|
15849
|
+
ggml_build_forward_impl(cgraph, tensor, false);
|
15850
|
+
return cgraph;
|
15851
|
+
}
|
15852
|
+
|
15853
|
+
size_t ggml_graph_overhead(void) {
|
15854
|
+
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
|
15855
|
+
}
|
15856
|
+
|
16111
15857
|
//
|
16112
15858
|
// thread data
|
16113
15859
|
//
|
@@ -16173,7 +15919,7 @@ typedef pthread_t ggml_thread_t;
|
|
16173
15919
|
|
16174
15920
|
// Android's libc implementation "bionic" does not support setting affinity
|
16175
15921
|
#if defined(__linux__) && !defined(__BIONIC__)
|
16176
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) {
|
15922
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) {
|
16177
15923
|
if (!ggml_is_numa()) {
|
16178
15924
|
return;
|
16179
15925
|
}
|
@@ -16198,7 +15944,7 @@ void set_numa_thread_affinity(int thread_n, int n_threads) {
|
|
16198
15944
|
CPU_FREE(cpus);
|
16199
15945
|
}
|
16200
15946
|
|
16201
|
-
void clear_numa_thread_affinity(void) {
|
15947
|
+
static void clear_numa_thread_affinity(void) {
|
16202
15948
|
if (!ggml_is_numa()) {
|
16203
15949
|
return;
|
16204
15950
|
}
|
@@ -16222,8 +15968,8 @@ void clear_numa_thread_affinity(void) {
|
|
16222
15968
|
#else
|
16223
15969
|
// TODO: Windows etc.
|
16224
15970
|
// (the linux implementation may also work on BSD, someone should test)
|
16225
|
-
void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
16226
|
-
void clear_numa_thread_affinity(void) {}
|
15971
|
+
static void set_numa_thread_affinity(int thread_n, int n_threads) { UNUSED(thread_n); UNUSED(n_threads); }
|
15972
|
+
static void clear_numa_thread_affinity(void) {}
|
16227
15973
|
#endif
|
16228
15974
|
|
16229
15975
|
struct ggml_compute_state_shared {
|
@@ -16293,8 +16039,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16293
16039
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16294
16040
|
params.nth = n_tasks_arr[node_n];
|
16295
16041
|
ggml_compute_forward(¶ms, node);
|
16296
|
-
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16297
16042
|
}
|
16043
|
+
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16298
16044
|
}
|
16299
16045
|
|
16300
16046
|
// distribute new work or execute it direct if 1T
|
@@ -16324,8 +16070,9 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
|
16324
16070
|
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
16325
16071
|
params.type = GGML_TASK_FINALIZE;
|
16326
16072
|
ggml_compute_forward(¶ms, node);
|
16327
|
-
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16328
16073
|
}
|
16074
|
+
|
16075
|
+
ggml_graph_compute_perf_stats_node(node, state->shared);
|
16329
16076
|
} else {
|
16330
16077
|
break;
|
16331
16078
|
}
|
@@ -16434,21 +16181,34 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16434
16181
|
case GGML_OP_ARGMAX:
|
16435
16182
|
case GGML_OP_REPEAT:
|
16436
16183
|
case GGML_OP_REPEAT_BACK:
|
16437
|
-
|
16438
|
-
case GGML_OP_SGN:
|
16439
|
-
case GGML_OP_NEG:
|
16440
|
-
case GGML_OP_STEP:
|
16441
|
-
case GGML_OP_TANH:
|
16442
|
-
case GGML_OP_ELU:
|
16443
|
-
case GGML_OP_RELU:
|
16444
|
-
{
|
16184
|
+
{
|
16445
16185
|
n_tasks = 1;
|
16446
16186
|
} break;
|
16447
|
-
|
16448
|
-
case
|
16449
|
-
|
16450
|
-
|
16187
|
+
|
16188
|
+
case GGML_OP_UNARY:
|
16189
|
+
{
|
16190
|
+
switch (ggml_get_unary_op(node)) {
|
16191
|
+
case GGML_UNARY_OP_ABS:
|
16192
|
+
case GGML_UNARY_OP_SGN:
|
16193
|
+
case GGML_UNARY_OP_NEG:
|
16194
|
+
case GGML_UNARY_OP_STEP:
|
16195
|
+
case GGML_UNARY_OP_TANH:
|
16196
|
+
case GGML_UNARY_OP_ELU:
|
16197
|
+
case GGML_UNARY_OP_RELU:
|
16198
|
+
{
|
16199
|
+
n_tasks = 1;
|
16200
|
+
} break;
|
16201
|
+
|
16202
|
+
case GGML_UNARY_OP_GELU:
|
16203
|
+
case GGML_UNARY_OP_GELU_QUICK:
|
16204
|
+
case GGML_UNARY_OP_SILU:
|
16205
|
+
{
|
16206
|
+
n_tasks = n_threads;
|
16207
|
+
} break;
|
16208
|
+
}
|
16209
|
+
} break;
|
16451
16210
|
case GGML_OP_SILU_BACK:
|
16211
|
+
case GGML_OP_MUL:
|
16452
16212
|
case GGML_OP_NORM:
|
16453
16213
|
case GGML_OP_RMS_NORM:
|
16454
16214
|
case GGML_OP_RMS_NORM_BACK:
|
@@ -16513,10 +16273,10 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16513
16273
|
case GGML_OP_GET_ROWS:
|
16514
16274
|
case GGML_OP_GET_ROWS_BACK:
|
16515
16275
|
case GGML_OP_DIAG:
|
16516
|
-
case GGML_OP_DIAG_MASK_ZERO:
|
16517
16276
|
{
|
16518
16277
|
n_tasks = 1;
|
16519
16278
|
} break;
|
16279
|
+
case GGML_OP_DIAG_MASK_ZERO:
|
16520
16280
|
case GGML_OP_DIAG_MASK_INF:
|
16521
16281
|
case GGML_OP_SOFT_MAX:
|
16522
16282
|
case GGML_OP_SOFT_MAX_BACK:
|
@@ -16575,19 +16335,22 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|
16575
16335
|
const int64_t ne11 = node->src[1]->ne[1]; // H
|
16576
16336
|
const int64_t ne12 = node->src[1]->ne[2]; // C
|
16577
16337
|
|
16338
|
+
const int64_t ne0 = node->ne[0];
|
16339
|
+
const int64_t ne1 = node->ne[1];
|
16340
|
+
const int64_t ne2 = node->ne[2];
|
16578
16341
|
const int64_t nk = ne00*ne01;
|
16342
|
+
const int64_t ew0 = nk * ne02;
|
16579
16343
|
|
16580
|
-
UNUSED(ne02);
|
16581
16344
|
UNUSED(ne03);
|
16582
|
-
UNUSED(
|
16345
|
+
UNUSED(ne2);
|
16583
16346
|
|
16584
16347
|
size_t cur = 0;
|
16585
16348
|
|
16586
16349
|
if (node->src[0]->type == GGML_TYPE_F16 &&
|
16587
|
-
|
16588
|
-
cur = sizeof(ggml_fp16_t)*(
|
16350
|
+
node->src[1]->type == GGML_TYPE_F32) {
|
16351
|
+
cur = sizeof(ggml_fp16_t)*(ne0*ne1*ew0);
|
16589
16352
|
} else if (node->src[0]->type == GGML_TYPE_F32 &&
|
16590
|
-
|
16353
|
+
node->src[1]->type == GGML_TYPE_F32) {
|
16591
16354
|
cur = sizeof(float)* (ne10*ne11*ne12);
|
16592
16355
|
} else {
|
16593
16356
|
GGML_ASSERT(false);
|
@@ -16806,10 +16569,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
|
|
16806
16569
|
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
|
16807
16570
|
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
|
16808
16571
|
|
16809
|
-
struct
|
16810
|
-
GGML_ASSERT(buf);
|
16572
|
+
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
|
16811
16573
|
|
16812
|
-
cplan.work_data =
|
16574
|
+
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;
|
16813
16575
|
|
16814
16576
|
ggml_graph_compute(cgraph, &cplan);
|
16815
16577
|
}
|
@@ -16864,9 +16626,6 @@ static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char
|
|
16864
16626
|
}
|
16865
16627
|
|
16866
16628
|
void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
16867
|
-
//assert(cgraph->work == NULL);
|
16868
|
-
//assert(cgraph->work_size == 0);
|
16869
|
-
|
16870
16629
|
uint64_t size_eval = 0;
|
16871
16630
|
|
16872
16631
|
// compute size of intermediate results
|
@@ -16963,7 +16722,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16963
16722
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16964
16723
|
}
|
16965
16724
|
|
16966
|
-
fwrite(tensor->name,
|
16725
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16726
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
16967
16727
|
|
16968
16728
|
// dump the data
|
16969
16729
|
// TODO: pad this to 32 byte boundary
|
@@ -16996,7 +16756,8 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
|
|
16996
16756
|
fwrite(&nb, sizeof(uint64_t), 1, fout);
|
16997
16757
|
}
|
16998
16758
|
|
16999
|
-
fwrite(tensor->name,
|
16759
|
+
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
|
16760
|
+
fwrite(tensor->op_params, sizeof(char), GGML_MAX_OP_PARAMS, fout);
|
17000
16761
|
|
17001
16762
|
// output the op arguments
|
17002
16763
|
{
|
@@ -17177,7 +16938,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17177
16938
|
|
17178
16939
|
tensor->op = (enum ggml_op) op;
|
17179
16940
|
|
17180
|
-
memcpy(tensor->name,
|
16941
|
+
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
|
16942
|
+
memcpy(tensor->op_params, ptr, GGML_MAX_OP_PARAMS); ptr += GGML_MAX_OP_PARAMS;
|
17181
16943
|
|
17182
16944
|
tensor->data = (void *) ptr;
|
17183
16945
|
|
@@ -17222,7 +16984,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17222
16984
|
nb[j] = nb_cur;
|
17223
16985
|
}
|
17224
16986
|
|
17225
|
-
const char * ptr_name
|
16987
|
+
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
|
16988
|
+
const char * ptr_op_params = ptr; ptr += GGML_MAX_OP_PARAMS;
|
17226
16989
|
|
17227
16990
|
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += GGML_MAX_SRC*sizeof(int32_t);
|
17228
16991
|
|
@@ -17259,8 +17022,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17259
17022
|
{
|
17260
17023
|
tensor = ggml_view_4d(*ctx_eval, args[0], ne[0], ne[1], ne[2], ne[3], 0, 0, 0, 0);
|
17261
17024
|
|
17262
|
-
|
17263
|
-
memcpy(&offs,
|
17025
|
+
size_t offs;
|
17026
|
+
memcpy(&offs, ptr_op_params, sizeof(offs));
|
17264
17027
|
|
17265
17028
|
tensor->data = ((char *) tensor->data) + offs;
|
17266
17029
|
} break;
|
@@ -17280,7 +17043,8 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
|
|
17280
17043
|
} break;
|
17281
17044
|
}
|
17282
17045
|
|
17283
|
-
memcpy(tensor->name,
|
17046
|
+
memcpy(tensor->name, ptr_name, GGML_MAX_NAME);
|
17047
|
+
memcpy(tensor->op_params, ptr_op_params, GGML_MAX_OP_PARAMS);
|
17284
17048
|
|
17285
17049
|
for (int j = 0; j < GGML_MAX_DIMS; ++j) {
|
17286
17050
|
tensor->nb[j] = nb[j];
|
@@ -17305,9 +17069,6 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17305
17069
|
|
17306
17070
|
GGML_PRINT("=== GRAPH ===\n");
|
17307
17071
|
|
17308
|
-
GGML_PRINT_DEBUG("n_threads = %d\n", cgraph->n_threads);
|
17309
|
-
GGML_PRINT_DEBUG("total work size = %zu bytes\n", cgraph->work_size);
|
17310
|
-
|
17311
17072
|
GGML_PRINT("n_nodes = %d\n", cgraph->n_nodes);
|
17312
17073
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
17313
17074
|
struct ggml_tensor * node = cgraph->nodes[i];
|
@@ -17317,7 +17078,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17317
17078
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
17318
17079
|
i,
|
17319
17080
|
node->ne[0], node->ne[1], node->ne[2],
|
17320
|
-
|
17081
|
+
ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
17321
17082
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms(),
|
17322
17083
|
(double) node->perf_cycles / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
|
17323
17084
|
(double) node->perf_time_us / 1000.0,
|
@@ -17331,7 +17092,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17331
17092
|
GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s\n",
|
17332
17093
|
i,
|
17333
17094
|
node->ne[0], node->ne[1],
|
17334
|
-
|
17095
|
+
ggml_op_name(node->op));
|
17335
17096
|
}
|
17336
17097
|
|
17337
17098
|
for (int i = 0; i < GGML_OP_COUNT; i++) {
|
@@ -17339,7 +17100,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
17339
17100
|
continue;
|
17340
17101
|
}
|
17341
17102
|
|
17342
|
-
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n",
|
17103
|
+
GGML_PRINT("perf_total_per_op_us[%16s] = %7.3f ms\n", ggml_op_name(i), (double) perf_total_per_op_us[i] / 1000.0);
|
17343
17104
|
}
|
17344
17105
|
|
17345
17106
|
GGML_PRINT("========================================\n");
|
@@ -17433,13 +17194,13 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
17433
17194
|
}
|
17434
17195
|
|
17435
17196
|
if (node->n_dims == 2) {
|
17436
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1],
|
17197
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
|
17437
17198
|
} else {
|
17438
|
-
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2],
|
17199
|
+
fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
|
17439
17200
|
}
|
17440
17201
|
|
17441
17202
|
if (node->grad) {
|
17442
|
-
fprintf(fp, " | <g>%s\"; ]\n",
|
17203
|
+
fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(node->grad->op));
|
17443
17204
|
} else {
|
17444
17205
|
fprintf(fp, "\"; ]\n");
|
17445
17206
|
}
|