llama_cpp 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/src/ggml-alloc.c +12 -9
- data/ext/llama_cpp/src/ggml-cuda.cu +83 -45
- data/ext/llama_cpp/src/ggml-cuda.h +5 -0
- data/ext/llama_cpp/src/ggml-metal.m +4 -3
- data/ext/llama_cpp/src/ggml.c +78 -252
- data/ext/llama_cpp/src/ggml.h +5 -0
- data/ext/llama_cpp/src/llama.cpp +113 -81
- data/ext/llama_cpp/src/llama.h +5 -5
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.c
CHANGED
@@ -4970,8 +4970,13 @@ struct ggml_tensor * ggml_rope_back(
|
|
4970
4970
|
int n_dims,
|
4971
4971
|
int mode,
|
4972
4972
|
int n_ctx,
|
4973
|
+
int n_orig_ctx,
|
4973
4974
|
float freq_base,
|
4974
4975
|
float freq_scale,
|
4976
|
+
float ext_factor,
|
4977
|
+
float attn_factor,
|
4978
|
+
float beta_fast,
|
4979
|
+
float beta_slow,
|
4975
4980
|
float xpos_base,
|
4976
4981
|
bool xpos_down) {
|
4977
4982
|
GGML_ASSERT(ggml_is_vector(b));
|
@@ -4988,11 +4993,15 @@ struct ggml_tensor * ggml_rope_back(
|
|
4988
4993
|
|
4989
4994
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
4990
4995
|
|
4991
|
-
int32_t params[
|
4992
|
-
memcpy(params +
|
4993
|
-
memcpy(params +
|
4994
|
-
memcpy(params +
|
4995
|
-
memcpy(params +
|
4996
|
+
int32_t params[13] = { /*n_past*/ 0, n_dims, mode, n_ctx, n_orig_ctx };
|
4997
|
+
memcpy(params + 5, &freq_base, sizeof(float));
|
4998
|
+
memcpy(params + 6, &freq_scale, sizeof(float));
|
4999
|
+
memcpy(params + 7, &ext_factor, sizeof(float));
|
5000
|
+
memcpy(params + 8, &attn_factor, sizeof(float));
|
5001
|
+
memcpy(params + 9, &beta_fast, sizeof(float));
|
5002
|
+
memcpy(params + 10, &beta_slow, sizeof(float));
|
5003
|
+
memcpy(params + 11, &xpos_base, sizeof(float));
|
5004
|
+
memcpy(params + 12, &xpos_down, sizeof(bool));
|
4996
5005
|
ggml_set_op_params(result, params, sizeof(params));
|
4997
5006
|
|
4998
5007
|
result->op = GGML_OP_ROPE_BACK;
|
@@ -10974,7 +10983,8 @@ static void ggml_compute_forward_rope_f32(
|
|
10974
10983
|
const struct ggml_compute_params * params,
|
10975
10984
|
const struct ggml_tensor * src0,
|
10976
10985
|
const struct ggml_tensor * src1,
|
10977
|
-
struct ggml_tensor * dst
|
10986
|
+
struct ggml_tensor * dst,
|
10987
|
+
const bool forward) {
|
10978
10988
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
10979
10989
|
return;
|
10980
10990
|
}
|
@@ -11033,6 +11043,11 @@ static void ggml_compute_forward_rope_f32(
|
|
11033
11043
|
const bool is_neox = mode & 2;
|
11034
11044
|
const bool is_glm = mode & 4;
|
11035
11045
|
|
11046
|
+
// backward process uses inverse rotation by cos and sin.
|
11047
|
+
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
11048
|
+
// this essentially just switches the sign of sin.
|
11049
|
+
const float sin_sign = forward ? 1.0f : -1.0f;
|
11050
|
+
|
11036
11051
|
const int32_t * pos = (const int32_t *) src1->data;
|
11037
11052
|
|
11038
11053
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
@@ -11049,9 +11064,9 @@ static void ggml_compute_forward_rope_f32(
|
|
11049
11064
|
float block_theta = MAX(p - (n_ctx - 2), 0);
|
11050
11065
|
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
11051
11066
|
const float cos_theta = cosf(theta_base);
|
11052
|
-
const float sin_theta = sinf(theta_base);
|
11067
|
+
const float sin_theta = sinf(theta_base) * sin_sign;
|
11053
11068
|
const float cos_block_theta = cosf(block_theta);
|
11054
|
-
const float sin_block_theta = sinf(block_theta);
|
11069
|
+
const float sin_block_theta = sinf(block_theta) * sin_sign;
|
11055
11070
|
|
11056
11071
|
theta_base *= theta_scale;
|
11057
11072
|
block_theta *= theta_scale;
|
@@ -11075,6 +11090,7 @@ static void ggml_compute_forward_rope_f32(
|
|
11075
11090
|
rope_yarn(
|
11076
11091
|
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
11077
11092
|
);
|
11093
|
+
sin_theta *= sin_sign;
|
11078
11094
|
|
11079
11095
|
// zeta scaling for xPos only:
|
11080
11096
|
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
@@ -11105,6 +11121,7 @@ static void ggml_compute_forward_rope_f32(
|
|
11105
11121
|
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
11106
11122
|
&cos_theta, &sin_theta
|
11107
11123
|
);
|
11124
|
+
sin_theta *= sin_sign;
|
11108
11125
|
|
11109
11126
|
theta_base *= theta_scale;
|
11110
11127
|
|
@@ -11130,7 +11147,8 @@ static void ggml_compute_forward_rope_f16(
|
|
11130
11147
|
const struct ggml_compute_params * params,
|
11131
11148
|
const struct ggml_tensor * src0,
|
11132
11149
|
const struct ggml_tensor * src1,
|
11133
|
-
struct ggml_tensor * dst
|
11150
|
+
struct ggml_tensor * dst,
|
11151
|
+
const bool forward) {
|
11134
11152
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11135
11153
|
return;
|
11136
11154
|
}
|
@@ -11182,6 +11200,11 @@ static void ggml_compute_forward_rope_f16(
|
|
11182
11200
|
const bool is_neox = mode & 2;
|
11183
11201
|
const bool is_glm = mode & 4;
|
11184
11202
|
|
11203
|
+
// backward process uses inverse rotation by cos and sin.
|
11204
|
+
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
11205
|
+
// this essentially just switches the sign of sin.
|
11206
|
+
const float sin_sign = forward ? 1.0f : -1.0f;
|
11207
|
+
|
11185
11208
|
const int32_t * pos = (const int32_t *) src1->data;
|
11186
11209
|
|
11187
11210
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
@@ -11198,9 +11221,9 @@ static void ggml_compute_forward_rope_f16(
|
|
11198
11221
|
float block_theta = MAX(p - (n_ctx - 2), 0);
|
11199
11222
|
for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
|
11200
11223
|
const float cos_theta = cosf(theta_base);
|
11201
|
-
const float sin_theta = sinf(theta_base);
|
11224
|
+
const float sin_theta = sinf(theta_base) * sin_sign;
|
11202
11225
|
const float cos_block_theta = cosf(block_theta);
|
11203
|
-
const float sin_block_theta = sinf(block_theta);
|
11226
|
+
const float sin_block_theta = sinf(block_theta) * sin_sign;
|
11204
11227
|
|
11205
11228
|
theta_base *= theta_scale;
|
11206
11229
|
block_theta *= theta_scale;
|
@@ -11224,6 +11247,7 @@ static void ggml_compute_forward_rope_f16(
|
|
11224
11247
|
rope_yarn(
|
11225
11248
|
theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
|
11226
11249
|
);
|
11250
|
+
sin_theta *= sin_sign;
|
11227
11251
|
|
11228
11252
|
theta_base *= theta_scale;
|
11229
11253
|
|
@@ -11250,6 +11274,7 @@ static void ggml_compute_forward_rope_f16(
|
|
11250
11274
|
theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
|
11251
11275
|
&cos_theta, &sin_theta
|
11252
11276
|
);
|
11277
|
+
sin_theta *= sin_sign;
|
11253
11278
|
|
11254
11279
|
theta_base *= theta_scale;
|
11255
11280
|
|
@@ -11279,11 +11304,11 @@ static void ggml_compute_forward_rope(
|
|
11279
11304
|
switch (src0->type) {
|
11280
11305
|
case GGML_TYPE_F16:
|
11281
11306
|
{
|
11282
|
-
ggml_compute_forward_rope_f16(params, src0, src1, dst);
|
11307
|
+
ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
|
11283
11308
|
} break;
|
11284
11309
|
case GGML_TYPE_F32:
|
11285
11310
|
{
|
11286
|
-
ggml_compute_forward_rope_f32(params, src0, src1, dst);
|
11311
|
+
ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
|
11287
11312
|
} break;
|
11288
11313
|
default:
|
11289
11314
|
{
|
@@ -11294,216 +11319,6 @@ static void ggml_compute_forward_rope(
|
|
11294
11319
|
|
11295
11320
|
// ggml_compute_forward_rope_back
|
11296
11321
|
|
11297
|
-
static void ggml_compute_forward_rope_back_f32(
|
11298
|
-
const struct ggml_compute_params * params,
|
11299
|
-
const struct ggml_tensor * src0,
|
11300
|
-
const struct ggml_tensor * src1,
|
11301
|
-
struct ggml_tensor * dst) {
|
11302
|
-
|
11303
|
-
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11304
|
-
return;
|
11305
|
-
}
|
11306
|
-
|
11307
|
-
// y = rope(x, src1)
|
11308
|
-
// dx = rope_back(dy, src1)
|
11309
|
-
// src0 is dy, src1 contains options
|
11310
|
-
|
11311
|
-
float freq_base;
|
11312
|
-
float freq_scale;
|
11313
|
-
|
11314
|
-
// these two only relevant for xPos RoPE:
|
11315
|
-
float xpos_base;
|
11316
|
-
bool xpos_down;
|
11317
|
-
|
11318
|
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
11319
|
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11320
|
-
const int mode = ((int32_t *) dst->op_params)[2];
|
11321
|
-
const int n_ctx = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
|
11322
|
-
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
|
11323
|
-
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
|
11324
|
-
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
|
11325
|
-
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
|
11326
|
-
|
11327
|
-
GGML_TENSOR_UNARY_OP_LOCALS
|
11328
|
-
|
11329
|
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
11330
|
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
11331
|
-
|
11332
|
-
assert(nb0 == sizeof(float));
|
11333
|
-
|
11334
|
-
const int ith = params->ith;
|
11335
|
-
const int nth = params->nth;
|
11336
|
-
|
11337
|
-
const int nr = ggml_nrows(dst);
|
11338
|
-
|
11339
|
-
// rows per thread
|
11340
|
-
const int dr = (nr + nth - 1)/nth;
|
11341
|
-
|
11342
|
-
// row range for this thread
|
11343
|
-
const int ir0 = dr*ith;
|
11344
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11345
|
-
|
11346
|
-
// row index used to determine which thread to use
|
11347
|
-
int ir = 0;
|
11348
|
-
|
11349
|
-
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
11350
|
-
|
11351
|
-
const bool is_neox = mode & 2;
|
11352
|
-
|
11353
|
-
const int32_t * pos = (const int32_t *) src1->data;
|
11354
|
-
|
11355
|
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
11356
|
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
11357
|
-
const int64_t p = pos[i2];
|
11358
|
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
11359
|
-
if (ir++ < ir0) continue;
|
11360
|
-
if (ir > ir1) break;
|
11361
|
-
|
11362
|
-
float theta_base = freq_scale * (float)p;
|
11363
|
-
|
11364
|
-
if (!is_neox) {
|
11365
|
-
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
11366
|
-
const float cos_theta = cosf(theta_base);
|
11367
|
-
const float sin_theta = sinf(theta_base);
|
11368
|
-
|
11369
|
-
// zeta scaling for xPos only:
|
11370
|
-
float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
|
11371
|
-
if (xpos_down) zeta = 1.0f / zeta;
|
11372
|
-
|
11373
|
-
theta_base *= theta_scale;
|
11374
|
-
|
11375
|
-
const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11376
|
-
float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11377
|
-
|
11378
|
-
const float dy0 = dy[0];
|
11379
|
-
const float dy1 = dy[1];
|
11380
|
-
|
11381
|
-
dx[0] = dy0*cos_theta*zeta + dy1*sin_theta*zeta;
|
11382
|
-
dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta;
|
11383
|
-
}
|
11384
|
-
} else {
|
11385
|
-
for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
|
11386
|
-
for (int64_t ic = 0; ic < n_dims; ic += 2) {
|
11387
|
-
const float cos_theta = cosf(theta_base);
|
11388
|
-
const float sin_theta = sinf(theta_base);
|
11389
|
-
|
11390
|
-
theta_base *= theta_scale;
|
11391
|
-
|
11392
|
-
const int64_t i0 = ib*n_dims + ic/2;
|
11393
|
-
|
11394
|
-
const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11395
|
-
float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11396
|
-
|
11397
|
-
const float dy0 = dy[0];
|
11398
|
-
const float dy1 = dy[n_dims/2];
|
11399
|
-
|
11400
|
-
dx[0] = dy0*cos_theta + dy1*sin_theta;
|
11401
|
-
dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta;
|
11402
|
-
}
|
11403
|
-
}
|
11404
|
-
}
|
11405
|
-
}
|
11406
|
-
}
|
11407
|
-
}
|
11408
|
-
}
|
11409
|
-
|
11410
|
-
static void ggml_compute_forward_rope_back_f16(
|
11411
|
-
const struct ggml_compute_params * params,
|
11412
|
-
const struct ggml_tensor * src0,
|
11413
|
-
const struct ggml_tensor * src1,
|
11414
|
-
struct ggml_tensor * dst) {
|
11415
|
-
|
11416
|
-
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
11417
|
-
return;
|
11418
|
-
}
|
11419
|
-
|
11420
|
-
// y = rope(x, src1)
|
11421
|
-
// dx = rope_back(dy, src1)
|
11422
|
-
// src0 is dy, src1 contains options
|
11423
|
-
|
11424
|
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
11425
|
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
11426
|
-
const int mode = ((int32_t *) dst->op_params)[2];
|
11427
|
-
|
11428
|
-
GGML_TENSOR_UNARY_OP_LOCALS
|
11429
|
-
|
11430
|
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
11431
|
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
11432
|
-
|
11433
|
-
assert(nb0 == sizeof(ggml_fp16_t));
|
11434
|
-
|
11435
|
-
const int ith = params->ith;
|
11436
|
-
const int nth = params->nth;
|
11437
|
-
|
11438
|
-
const int nr = ggml_nrows(dst);
|
11439
|
-
|
11440
|
-
// rows per thread
|
11441
|
-
const int dr = (nr + nth - 1)/nth;
|
11442
|
-
|
11443
|
-
// row range for this thread
|
11444
|
-
const int ir0 = dr*ith;
|
11445
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
11446
|
-
|
11447
|
-
// row index used to determine which thread to use
|
11448
|
-
int ir = 0;
|
11449
|
-
|
11450
|
-
const float theta_scale = powf(10000.0, -2.0f/n_dims);
|
11451
|
-
|
11452
|
-
const bool is_neox = mode & 2;
|
11453
|
-
|
11454
|
-
const int32_t * pos = (const int32_t *) src1->data;
|
11455
|
-
|
11456
|
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
11457
|
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
11458
|
-
const int64_t p = pos[i2];
|
11459
|
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
11460
|
-
if (ir++ < ir0) continue;
|
11461
|
-
if (ir > ir1) break;
|
11462
|
-
|
11463
|
-
float theta_base = (float)p;
|
11464
|
-
|
11465
|
-
if (!is_neox) {
|
11466
|
-
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
|
11467
|
-
const float cos_theta = cosf(theta_base);
|
11468
|
-
const float sin_theta = sinf(theta_base);
|
11469
|
-
|
11470
|
-
theta_base *= theta_scale;
|
11471
|
-
|
11472
|
-
const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11473
|
-
ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11474
|
-
|
11475
|
-
const float dy0 = GGML_FP16_TO_FP32(dy[0]);
|
11476
|
-
const float dy1 = GGML_FP16_TO_FP32(dy[1]);
|
11477
|
-
|
11478
|
-
dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
|
11479
|
-
dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
|
11480
|
-
}
|
11481
|
-
} else {
|
11482
|
-
for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
|
11483
|
-
for (int64_t ic = 0; ic < n_dims; ic += 2) {
|
11484
|
-
const float cos_theta = cosf(theta_base);
|
11485
|
-
const float sin_theta = sinf(theta_base);
|
11486
|
-
|
11487
|
-
theta_base *= theta_scale;
|
11488
|
-
|
11489
|
-
const int64_t i0 = ib*n_dims + ic/2;
|
11490
|
-
|
11491
|
-
const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
11492
|
-
ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
11493
|
-
|
11494
|
-
const float dy0 = GGML_FP16_TO_FP32(dy[0]);
|
11495
|
-
const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]);
|
11496
|
-
|
11497
|
-
dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
|
11498
|
-
dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
|
11499
|
-
}
|
11500
|
-
}
|
11501
|
-
}
|
11502
|
-
}
|
11503
|
-
}
|
11504
|
-
}
|
11505
|
-
}
|
11506
|
-
|
11507
11322
|
static void ggml_compute_forward_rope_back(
|
11508
11323
|
const struct ggml_compute_params * params,
|
11509
11324
|
const struct ggml_tensor * src0,
|
@@ -11512,11 +11327,11 @@ static void ggml_compute_forward_rope_back(
|
|
11512
11327
|
switch (src0->type) {
|
11513
11328
|
case GGML_TYPE_F16:
|
11514
11329
|
{
|
11515
|
-
|
11330
|
+
ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
|
11516
11331
|
} break;
|
11517
11332
|
case GGML_TYPE_F32:
|
11518
11333
|
{
|
11519
|
-
|
11334
|
+
ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
|
11520
11335
|
} break;
|
11521
11336
|
default:
|
11522
11337
|
{
|
@@ -15559,17 +15374,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15559
15374
|
// necessary for llama
|
15560
15375
|
if (src0->grad) {
|
15561
15376
|
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
15562
|
-
const int n_dims
|
15563
|
-
const int mode
|
15564
|
-
const int n_ctx
|
15565
|
-
|
15566
|
-
float freq_scale;
|
15567
|
-
|
15568
|
-
|
15569
|
-
memcpy(&
|
15570
|
-
memcpy(&
|
15571
|
-
memcpy(&
|
15572
|
-
memcpy(&
|
15377
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15378
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15379
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15380
|
+
const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
|
15381
|
+
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
|
15382
|
+
|
15383
|
+
memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
|
15384
|
+
memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
|
15385
|
+
memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
|
15386
|
+
memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
|
15387
|
+
memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
|
15388
|
+
memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
|
15389
|
+
memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
|
15390
|
+
memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
|
15573
15391
|
|
15574
15392
|
src0->grad = ggml_add_or_set(ctx,
|
15575
15393
|
src0->grad,
|
@@ -15579,8 +15397,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15579
15397
|
n_dims,
|
15580
15398
|
mode,
|
15581
15399
|
n_ctx,
|
15400
|
+
n_orig_ctx,
|
15582
15401
|
freq_base,
|
15583
15402
|
freq_scale,
|
15403
|
+
ext_factor,
|
15404
|
+
attn_factor,
|
15405
|
+
beta_fast,
|
15406
|
+
beta_slow,
|
15584
15407
|
xpos_base,
|
15585
15408
|
xpos_down),
|
15586
15409
|
zero_table);
|
@@ -15590,17 +15413,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15590
15413
|
{
|
15591
15414
|
if (src0->grad) {
|
15592
15415
|
//const int n_past = ((int32_t *) tensor->op_params)[0];
|
15593
|
-
const int n_dims
|
15594
|
-
const int mode
|
15595
|
-
const int n_ctx
|
15596
|
-
|
15597
|
-
float freq_scale;
|
15598
|
-
|
15599
|
-
|
15600
|
-
memcpy(&
|
15601
|
-
memcpy(&
|
15602
|
-
memcpy(&
|
15603
|
-
memcpy(&
|
15416
|
+
const int n_dims = ((int32_t *) tensor->op_params)[1];
|
15417
|
+
const int mode = ((int32_t *) tensor->op_params)[2];
|
15418
|
+
const int n_ctx = ((int32_t *) tensor->op_params)[3];
|
15419
|
+
const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
|
15420
|
+
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
|
15421
|
+
|
15422
|
+
memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
|
15423
|
+
memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
|
15424
|
+
memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
|
15425
|
+
memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
|
15426
|
+
memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
|
15427
|
+
memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
|
15428
|
+
memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
|
15429
|
+
memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
|
15604
15430
|
|
15605
15431
|
src0->grad = ggml_add_or_set(ctx,
|
15606
15432
|
src0->grad,
|
@@ -15609,14 +15435,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|
15609
15435
|
src1,
|
15610
15436
|
n_dims,
|
15611
15437
|
mode,
|
15612
|
-
0,
|
15613
15438
|
n_ctx,
|
15439
|
+
n_orig_ctx,
|
15614
15440
|
freq_base,
|
15615
15441
|
freq_scale,
|
15616
|
-
|
15617
|
-
|
15618
|
-
|
15619
|
-
|
15442
|
+
ext_factor,
|
15443
|
+
attn_factor,
|
15444
|
+
beta_fast,
|
15445
|
+
beta_slow,
|
15620
15446
|
xpos_base,
|
15621
15447
|
xpos_down,
|
15622
15448
|
false),
|
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -1372,8 +1372,13 @@ extern "C" {
|
|
1372
1372
|
int n_dims,
|
1373
1373
|
int mode,
|
1374
1374
|
int n_ctx,
|
1375
|
+
int n_orig_ctx,
|
1375
1376
|
float freq_base,
|
1376
1377
|
float freq_scale,
|
1378
|
+
float ext_factor,
|
1379
|
+
float attn_factor,
|
1380
|
+
float beta_fast,
|
1381
|
+
float beta_slow,
|
1377
1382
|
float xpos_base,
|
1378
1383
|
bool xpos_down);
|
1379
1384
|
|