llama_cpp 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/src/ggml-alloc.c +12 -9
- data/ext/llama_cpp/src/ggml-cuda.cu +83 -45
- data/ext/llama_cpp/src/ggml-cuda.h +5 -0
- data/ext/llama_cpp/src/ggml-metal.m +4 -3
- data/ext/llama_cpp/src/ggml.c +78 -252
- data/ext/llama_cpp/src/ggml.h +5 -0
- data/ext/llama_cpp/src/llama.cpp +113 -81
- data/ext/llama_cpp/src/llama.h +5 -5
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -4970,8 +4970,13 @@ struct ggml_tensor * ggml_rope_back(
|
|
4970
4970
|
int n_dims,
|
4971
4971
|
int mode,
|
4972
4972
|
int n_ctx,
|
4973
|
+
int n_orig_ctx,
|
4973
4974
|
float freq_base,
|
4974
4975
|
float freq_scale,
|
4976
|
+
float ext_factor,
|
4977
|
+
float attn_factor,
|
4978
|
+
float beta_fast,
|
4979
|
+
float beta_slow,
|
4975
4980
|
float xpos_base,
|
4976
4981
|
bool xpos_down) {
|
4977
4982
|
GGML_ASSERT(ggml_is_vector(b));
|
@@ -4988,11 +4993,15 @@ struct ggml_tensor * ggml_rope_back(
|
|
4988
4993
|
|
4989
4994
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
4990
4995
|
|
4991
|
-
int32_t params[
|
4992
|
-
memcpy(params +
|
4993
|
-
memcpy(params +
|
4994
|
-
memcpy(params +
|
4995
|
-
memcpy(params +
|
4996
|
+
int32_t params[13] = { /*n_past*/ 0, n_dims, mode, n_ctx, n_orig_ctx };
|
4997
|
+
memcpy(params + 5, &freq_base, sizeof(float));
|
4998
|
+
memcpy(params + 6, &freq_scale, sizeof(float));
|
4999
|
+
memcpy(params + 7, &ext_factor, sizeof(float));
|
5000
|
+
memcpy(params + 8, &attn_factor, sizeof(float));
|
5001
|
+
memcpy(params + 9, &beta_fast, sizeof(float));
|
5002
|
+
memcpy(params + 10, &beta_slow, sizeof(float));
|
5003
|
+
memcpy(params + 11, &xpos_base, sizeof(float));
|
5004
|
+
memcpy(params + 12, &xpos_down, sizeof(bool));
|
4996
5005
|
ggml_set_op_params(result, params, sizeof(params));
|
4997
5006
|
|
4998
5007
|
result->op = GGML_OP_ROPE_BACK;
|
@@ -10974,7 +10983,8 @@ static void ggml_compute_forward_rope_f32(
|
|
10974
10983
|
const struct ggml_compute_params * params,
|
10975
10984
|
const struct ggml_tensor * src0,
|
10976
10985
|
const struct ggml_tensor * src1,
|
10977
|
-
struct ggml_tensor * dst
|
10986
|
+
struct ggml_tensor * dst,
|
10987
|
+
const bool forward) {
|
10978
10988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
10979
10989
|
return;
|
10980
10990
|
}
|
@@ -11033,6 +11043,11 @@ static void ggml_compute_forward_rope_f32(
|
|
11033
11043
|
const bool is_neox = mode & 2;
|
11034
11044
|
const bool is_glm = mode & 4;
|
11035
11045
|
|
11046
|
+
// backward process uses inverse rotation by cos and sin.
|
11047
|
+
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
11048
|
+
// this essentially just switches the sign of sin.
|
11049
|
+
const float sin_sign = forward ? 1.0f : -1.0f;
|
11050
|
+
|
11036
11051
|
const int32_t * pos = (const int32_t *) src1->data;
|
11037
11052
|
|
11038
11053
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
@@ -11049,9 +11064,9 @@ static void ggml_compute_forward_rope_f32(
|
|
11049
11064
|
float block_theta = MAX(p - (n_ctx - 2), 0);
|
11050
11065
|
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
11051
11066
|
const float cos_theta = cosf(theta_base);
|
11052
|
-
const float sin_theta = sinf(theta_base);
|
11067
|
+
const float sin_theta = sinf(theta_base) * sin_sign;
|
11053
11068
|
const float cos_block_theta = cosf(block_theta);
|
11054
|
-
const float sin_block_theta = sinf(block_theta);
|
11069
|
+
const float sin_block_theta = sinf(block_theta) * sin_sign;
|
11055
11070
|
|
11056
11071
|
theta_base *= theta_scale;
|
11057
11072
|
block_theta *= theta_scale;
|
@@ -11075,6 +11090,7 @@ static void ggml_compute_forward_rope_f32(
|
|
11075
11090
|
rope_yarn(
|
11076
11091
|
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
11077
11092
|
);
|
11093
|
+
sin_theta *= sin_sign;
|
11078
11094
|
|
11079
11095
|
// zeta scaling for xPos only:
|
11080
11096
|
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
@@ -11105,6 +11121,7 @@ static void ggml_compute_forward_rope_f32(
|
|
11105
11121
|
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
11106
11122
|
&cos_theta, &sin_theta
|
11107
11123
|
);
|
11124
|
+
sin_theta *= sin_sign;
|
11108
11125
|
|
11109
11126
|
theta_base *= theta_scale;
|
11110
11127
|
|
@@ -11130,7 +11147,8 @@ static void ggml_compute_forward_rope_f16(
|
|
11130
11147
|
const struct ggml_compute_params * params,
|
11131
11148
|
const struct ggml_tensor * src0,
|
11132
11149
|
const struct ggml_tensor * src1,
|
11133
|
-
struct ggml_tensor * dst
|
11150
|
+
struct ggml_tensor * dst,
|
11151
|
+
const bool forward) {
|
11134
11152
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11135
11153
|
return;
|
11136
11154
|
}
|
@@ -11182,6 +11200,11 @@ static void ggml_compute_forward_rope_f16(
|
|
11182
11200
|
const bool is_neox = mode & 2;
|
11183
11201
|
const bool is_glm = mode & 4;
|
11184
11202
|
|
11203
|
+
// backward process uses inverse rotation by cos and sin.
|
11204
|
+
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
11205
|
+
// this essentially just switches the sign of sin.
|
11206
|
+
const float sin_sign = forward ? 1.0f : -1.0f;
|
11207
|
+
|
11185
11208
|
const int32_t * pos = (const int32_t *) src1->data;
|
11186
11209
|
|
11187
11210
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
@@ -11198,9 +11221,9 @@ static void ggml_compute_forward_rope_f16(
|
|
11198
11221
|
float block_theta = MAX(p - (n_ctx - 2), 0);
|
11199
11222
|
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
11200
11223
|
const float cos_theta = cosf(theta_base);
|
11201
|
-
const float sin_theta = sinf(theta_base);
|
11224
|
+
const float sin_theta = sinf(theta_base) * sin_sign;
|
11202
11225
|
const float cos_block_theta = cosf(block_theta);
|
11203
|
-
const float sin_block_theta = sinf(block_theta);
|
11226
|
+
const float sin_block_theta = sinf(block_theta) * sin_sign;
|
11204
11227
|
|
11205
11228
|
theta_base *= theta_scale;
|
11206
11229
|
block_theta *= theta_scale;
|
@@ -11224,6 +11247,7 @@ static void ggml_compute_forward_rope_f16(
|
|
11224
11247
|
rope_yarn(
|
11225
11248
|
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
11226
11249
|
);
|
11250
|
+
sin_theta *= sin_sign;
|
11227
11251
|
|
11228
11252
|
theta_base *= theta_scale;
|
11229
11253
|
|
@@ -11250,6 +11274,7 @@ static void ggml_compute_forward_rope_f16(
|
|
11250
11274
|
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
11251
11275
|
&cos_theta, &sin_theta
|
11252
11276
|
);
|
11277
|
+
sin_theta *= sin_sign;
|
11253
11278
|
|
11254
11279
|
theta_base *= theta_scale;
|
11255
11280
|
|
@@ -11279,11 +11304,11 @@ static void ggml_compute_forward_rope(
|
|
11279
11304
|
switch (src0->type) {
|
11280
11305
|
case GGML_TYPE_F16:
|
11281
11306
|
{
|
11282
|
-
ggml_compute_forward_rope_f16(params, src0, src1, dst);
|
11307
|
+
ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
|
11283
11308
|
} break;
|
11284
11309
|
case GGML_TYPE_F32:
|
11285
11310
|
{
|
11286
|
-
ggml_compute_forward_rope_f32(params, src0, src1, dst);
|
11311
|
+
ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
|
11287
11312
|
} break;
|
11288
11313
|
default:
|
11289
11314
|
{
|
@@ -11294,216 +11319,6 @@ static void ggml_compute_forward_rope(
|
|
11294
11319
|
|
11295
11320
|
// ggml_compute_forward_rope_back
|
11296
11321
|
|
11297
|
-
static void ggml_compute_forward_rope_back_f32(
|
11298
|
-
const struct ggml_compute_params * params,
|
11299
|
-
const struct ggml_tensor * src0,
|
11300
|
-
const struct ggml_tensor * src1,
|
11301
|
-
struct ggml_tensor * dst) {
|
11302
|
-
|
11303
|
-
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11304
|
-
return;
|
11305
|
-
}
|
11306
|
-
|
11307
|
-
// y = rope(x, src1)
|
11308
|
-
// dx = rope_back(dy, src1)
|
11309
|
-
// src0 is dy, src1 contains options
|
11310
|
-
|
11311
|
-
float freq_base;
|
11312
|
-
float freq_scale;
|
11313
|
-
|
11314
|
-
// these two only relevant for xPos RoPE:
|
11315
|
-
float xpos_base;
|
11316
|
-
bool xpos_down;
|
11317
|
-
|
11318
|
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
11319
|
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11320
|
-
const int mode = ((int32_t *) dst->op_params)[2];
|
11321
|
-
const int n_ctx = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
|
11322
|
-
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11323
|
-
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
11324
|
-
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
11325
|
-
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
11326
|
-
|
11327
|
-
GGML_TENSOR_UNARY_OP_LOCALS
|
11328
|
-
|
11329
|
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
11330
|
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
11331
|
-
|
11332
|
-
assert(nb0 == sizeof(float));
|
11333
|
-
|
11334
|
-
const int ith = params->ith;
|
11335
|
-
const int nth = params->nth;
|
11336
|
-
|
11337
|
-
const int nr = ggml_nrows(dst);
|
11338
|
-
|
11339
|
-
// rows per thread
|
11340
|
-
const int dr = (nr + nth - 1)/nth;
|
11341
|
-
|
11342
|
-
// row range for this thread
|
11343
|
-
const int ir0 = dr*ith;
|
11344
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11345
|
-
|
11346
|
-
// row index used to determine which thread to use
|
11347
|
-
int ir = 0;
|
11348
|
-
|
11349
|
-
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
11350
|
-
|
11351
|
-
const bool is_neox = mode & 2;
|
11352
|
-
|
11353
|
-
const int32_t * pos = (const int32_t *) src1->data;
|
11354
|
-
|
11355
|
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
11356
|
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
11357
|
-
const int64_t p = pos[i2];
|
11358
|
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
11359
|
-
if (ir++ < ir0) continue;
|
11360
|
-
if (ir > ir1) break;
|
11361
|
-
|
11362
|
-
float theta_base = freq_scale * (float)p;
|
11363
|
-
|
11364
|
-
if (!is_neox) {
|
11365
|
-
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
11366
|
-
const float cos_theta = cosf(theta_base);
|
11367
|
-
const float sin_theta = sinf(theta_base);
|
11368
|
-
|
11369
|
-
// zeta scaling for xPos only:
|
11370
|
-
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
11371
|
-
if (xpos_down) zeta = 1.0f / zeta;
|
11372
|
-
|
11373
|
-
theta_base *= theta_scale;
|
11374
|
-
|
11375
|
-
const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11376
|
-
float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11377
|
-
|
11378
|
-
const float dy0 = dy[0];
|
11379
|
-
const float dy1 = dy[1];
|
11380
|
-
|
11381
|
-
dx[0] = dy0*cos_theta*zeta + dy1*sin_theta*zeta;
|
11382
|
-
dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta;
|
11383
|
-
}
|
11384
|
-
} else {
|
11385
|
-
for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
|
11386
|
-
for (int64_t ic = 0; ic < n_dims; ic += 2) {
|
11387
|
-
const float cos_theta = cosf(theta_base);
|
11388
|
-
const float sin_theta = sinf(theta_base);
|
11389
|
-
|
11390
|
-
theta_base *= theta_scale;
|
11391
|
-
|
11392
|
-
const int64_t i0 = ib*n_dims + ic/2;
|
11393
|
-
|
11394
|
-
const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11395
|
-
float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11396
|
-
|
11397
|
-
const float dy0 = dy[0];
|
11398
|
-
const float dy1 = dy[n_dims/2];
|
11399
|
-
|
11400
|
-
dx[0] = dy0*cos_theta + dy1*sin_theta;
|
11401
|
-
dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta;
|
11402
|
-
}
|
11403
|
-
}
|
11404
|
-
}
|
11405
|
-
}
|
11406
|
-
}
|
11407
|
-
}
|
11408
|
-
}
|
11409
|
-
|
11410
|
-
static void ggml_compute_forward_rope_back_f16(
|
11411
|
-
const struct ggml_compute_params * params,
|
11412
|
-
const struct ggml_tensor * src0,
|
11413
|
-
const struct ggml_tensor * src1,
|
11414
|
-
struct ggml_tensor * dst) {
|
11415
|
-
|
11416
|
-
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11417
|
-
return;
|
11418
|
-
}
|
11419
|
-
|
11420
|
-
// y = rope(x, src1)
|
11421
|
-
// dx = rope_back(dy, src1)
|
11422
|
-
// src0 is dy, src1 contains options
|
11423
|
-
|
11424
|
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
11425
|
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11426
|
-
const int mode = ((int32_t *) dst->op_params)[2];
|
11427
|
-
|
11428
|
-
GGML_TENSOR_UNARY_OP_LOCALS
|
11429
|
-
|
11430
|
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
11431
|
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
11432
|
-
|
11433
|
-
assert(nb0 == sizeof(ggml_fp16_t));
|
11434
|
-
|
11435
|
-
const int ith = params->ith;
|
11436
|
-
const int nth = params->nth;
|
11437
|
-
|
11438
|
-
const int nr = ggml_nrows(dst);
|
11439
|
-
|
11440
|
-
// rows per thread
|
11441
|
-
const int dr = (nr + nth - 1)/nth;
|
11442
|
-
|
11443
|
-
// row range for this thread
|
11444
|
-
const int ir0 = dr*ith;
|
11445
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11446
|
-
|
11447
|
-
// row index used to determine which thread to use
|
11448
|
-
int ir = 0;
|
11449
|
-
|
11450
|
-
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
11451
|
-
|
11452
|
-
const bool is_neox = mode & 2;
|
11453
|
-
|
11454
|
-
const int32_t * pos = (const int32_t *) src1->data;
|
11455
|
-
|
11456
|
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
11457
|
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
11458
|
-
const int64_t p = pos[i2];
|
11459
|
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
11460
|
-
if (ir++ < ir0) continue;
|
11461
|
-
if (ir > ir1) break;
|
11462
|
-
|
11463
|
-
float theta_base = (float)p;
|
11464
|
-
|
11465
|
-
if (!is_neox) {
|
11466
|
-
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
11467
|
-
const float cos_theta = cosf(theta_base);
|
11468
|
-
const float sin_theta = sinf(theta_base);
|
11469
|
-
|
11470
|
-
theta_base *= theta_scale;
|
11471
|
-
|
11472
|
-
const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11473
|
-
ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11474
|
-
|
11475
|
-
const float dy0 = GGML_FP16_TO_FP32(dy[0]);
|
11476
|
-
const float dy1 = GGML_FP16_TO_FP32(dy[1]);
|
11477
|
-
|
11478
|
-
dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
|
11479
|
-
dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
|
11480
|
-
}
|
11481
|
-
} else {
|
11482
|
-
for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
|
11483
|
-
for (int64_t ic = 0; ic < n_dims; ic += 2) {
|
11484
|
-
const float cos_theta = cosf(theta_base);
|
11485
|
-
const float sin_theta = sinf(theta_base);
|
11486
|
-
|
11487
|
-
theta_base *= theta_scale;
|
11488
|
-
|
11489
|
-
const int64_t i0 = ib*n_dims + ic/2;
|
11490
|
-
|
11491
|
-
const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11492
|
-
ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11493
|
-
|
11494
|
-
const float dy0 = GGML_FP16_TO_FP32(dy[0]);
|
11495
|
-
const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]);
|
11496
|
-
|
11497
|
-
dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
|
11498
|
-
dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
|
11499
|
-
}
|
11500
|
-
}
|
11501
|
-
}
|
11502
|
-
}
|
11503
|
-
}
|
11504
|
-
}
|
11505
|
-
}
|
11506
|
-
|
11507
11322
|
static void ggml_compute_forward_rope_back(
|
11508
11323
|
const struct ggml_compute_params * params,
|
11509
11324
|
const struct ggml_tensor * src0,
|
@@ -11512,11 +11327,11 @@ static void ggml_compute_forward_rope_back(
|
|
11512
11327
|
switch (src0->type) {
|
11513
11328
|
case GGML_TYPE_F16:
|
11514
11329
|
{
|
11515
|
-
|
11330
|
+
ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
|
11516
11331
|
} break;
|
11517
11332
|
case GGML_TYPE_F32:
|
11518
11333
|
{
|
11519
|
-
|
11334
|
+
ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
|
11520
11335
|
} break;
|
11521
11336
|
default:
|
11522
11337
|
{
|
@@ -15559,17 +15374,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15559
15374
|
// necessary for llama
|
15560
15375
|
if (src0->grad) {
|
15561
15376
|
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
15562
|
-
const int n_dims
|
15563
|
-
const int mode
|
15564
|
-
const int n_ctx
|
15565
|
-
|
15566
|
-
float freq_scale;
|
15567
|
-
|
15568
|
-
|
15569
|
-
memcpy(&
|
15570
|
-
memcpy(&
|
15571
|
-
memcpy(&
|
15572
|
-
memcpy(&
|
15377
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15378
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15379
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15380
|
+
const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
|
15381
|
+
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
|
15382
|
+
|
15383
|
+
memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
|
15384
|
+
memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
|
15385
|
+
memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
|
15386
|
+
memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
|
15387
|
+
memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
|
15388
|
+
memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
|
15389
|
+
memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
|
15390
|
+
memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
|
15573
15391
|
|
15574
15392
|
src0->grad = ggml_add_or_set(ctx,
|
15575
15393
|
src0->grad,
|
@@ -15579,8 +15397,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15579
15397
|
n_dims,
|
15580
15398
|
mode,
|
15581
15399
|
n_ctx,
|
15400
|
+
n_orig_ctx,
|
15582
15401
|
freq_base,
|
15583
15402
|
freq_scale,
|
15403
|
+
ext_factor,
|
15404
|
+
attn_factor,
|
15405
|
+
beta_fast,
|
15406
|
+
beta_slow,
|
15584
15407
|
xpos_base,
|
15585
15408
|
xpos_down),
|
15586
15409
|
zero_table);
|
@@ -15590,17 +15413,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15590
15413
|
{
|
15591
15414
|
if (src0->grad) {
|
15592
15415
|
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
15593
|
-
const int n_dims
|
15594
|
-
const int mode
|
15595
|
-
const int n_ctx
|
15596
|
-
|
15597
|
-
float freq_scale;
|
15598
|
-
|
15599
|
-
|
15600
|
-
memcpy(&
|
15601
|
-
memcpy(&
|
15602
|
-
memcpy(&
|
15603
|
-
memcpy(&
|
15416
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15417
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15418
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15419
|
+
const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
|
15420
|
+
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
|
15421
|
+
|
15422
|
+
memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
|
15423
|
+
memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
|
15424
|
+
memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
|
15425
|
+
memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
|
15426
|
+
memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
|
15427
|
+
memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
|
15428
|
+
memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
|
15429
|
+
memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
|
15604
15430
|
|
15605
15431
|
src0->grad = ggml_add_or_set(ctx,
|
15606
15432
|
src0->grad,
|
@@ -15609,14 +15435,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15609
15435
|
src1,
|
15610
15436
|
n_dims,
|
15611
15437
|
mode,
|
15612
|
-
0,
|
15613
15438
|
n_ctx,
|
15439
|
+
n_orig_ctx,
|
15614
15440
|
freq_base,
|
15615
15441
|
freq_scale,
|
15616
|
-
|
15617
|
-
|
15618
|
-
|
15619
|
-
|
15442
|
+
ext_factor,
|
15443
|
+
attn_factor,
|
15444
|
+
beta_fast,
|
15445
|
+
beta_slow,
|
15620
15446
|
xpos_base,
|
15621
15447
|
xpos_down,
|
15622
15448
|
false),
|
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -1372,8 +1372,13 @@ extern "C" {
|
|
1372
1372
|
int n_dims,
|
1373
1373
|
int mode,
|
1374
1374
|
int n_ctx,
|
1375
|
+
int n_orig_ctx,
|
1375
1376
|
float freq_base,
|
1376
1377
|
float freq_scale,
|
1378
|
+
float ext_factor,
|
1379
|
+
float attn_factor,
|
1380
|
+
float beta_fast,
|
1381
|
+
float beta_slow,
|
1377
1382
|
float xpos_base,
|
1378
1383
|
bool xpos_down);
|
1379
1384
|
|