llama_cpp 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -4970,8 +4970,13 @@ struct ggml_tensor * ggml_rope_back(
4970
4970
  int n_dims,
4971
4971
  int mode,
4972
4972
  int n_ctx,
4973
+ int n_orig_ctx,
4973
4974
  float freq_base,
4974
4975
  float freq_scale,
4976
+ float ext_factor,
4977
+ float attn_factor,
4978
+ float beta_fast,
4979
+ float beta_slow,
4975
4980
  float xpos_base,
4976
4981
  bool xpos_down) {
4977
4982
  GGML_ASSERT(ggml_is_vector(b));
@@ -4988,11 +4993,15 @@ struct ggml_tensor * ggml_rope_back(
4988
4993
 
4989
4994
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
4990
4995
 
4991
- int32_t params[8] = { /*n_past*/ 0, n_dims, mode, n_ctx };
4992
- memcpy(params + 4, &freq_base, sizeof(float));
4993
- memcpy(params + 5, &freq_scale, sizeof(float));
4994
- memcpy(params + 6, &xpos_base, sizeof(float));
4995
- memcpy(params + 7, &xpos_down, sizeof(bool));
4996
+ int32_t params[13] = { /*n_past*/ 0, n_dims, mode, n_ctx, n_orig_ctx };
4997
+ memcpy(params + 5, &freq_base, sizeof(float));
4998
+ memcpy(params + 6, &freq_scale, sizeof(float));
4999
+ memcpy(params + 7, &ext_factor, sizeof(float));
5000
+ memcpy(params + 8, &attn_factor, sizeof(float));
5001
+ memcpy(params + 9, &beta_fast, sizeof(float));
5002
+ memcpy(params + 10, &beta_slow, sizeof(float));
5003
+ memcpy(params + 11, &xpos_base, sizeof(float));
5004
+ memcpy(params + 12, &xpos_down, sizeof(bool));
4996
5005
  ggml_set_op_params(result, params, sizeof(params));
4997
5006
 
4998
5007
  result->op = GGML_OP_ROPE_BACK;
@@ -10974,7 +10983,8 @@ static void ggml_compute_forward_rope_f32(
10974
10983
  const struct ggml_compute_params * params,
10975
10984
  const struct ggml_tensor * src0,
10976
10985
  const struct ggml_tensor * src1,
10977
- struct ggml_tensor * dst) {
10986
+ struct ggml_tensor * dst,
10987
+ const bool forward) {
10978
10988
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
10979
10989
  return;
10980
10990
  }
@@ -11033,6 +11043,11 @@ static void ggml_compute_forward_rope_f32(
11033
11043
  const bool is_neox = mode & 2;
11034
11044
  const bool is_glm = mode & 4;
11035
11045
 
11046
+ // backward process uses inverse rotation by cos and sin.
11047
+ // cos and sin build a rotation matrix, where the inverse is the transpose.
11048
+ // this essentially just switches the sign of sin.
11049
+ const float sin_sign = forward ? 1.0f : -1.0f;
11050
+
11036
11051
  const int32_t * pos = (const int32_t *) src1->data;
11037
11052
 
11038
11053
  for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -11049,9 +11064,9 @@ static void ggml_compute_forward_rope_f32(
11049
11064
  float block_theta = MAX(p - (n_ctx - 2), 0);
11050
11065
  for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
11051
11066
  const float cos_theta = cosf(theta_base);
11052
- const float sin_theta = sinf(theta_base);
11067
+ const float sin_theta = sinf(theta_base) * sin_sign;
11053
11068
  const float cos_block_theta = cosf(block_theta);
11054
- const float sin_block_theta = sinf(block_theta);
11069
+ const float sin_block_theta = sinf(block_theta) * sin_sign;
11055
11070
 
11056
11071
  theta_base *= theta_scale;
11057
11072
  block_theta *= theta_scale;
@@ -11075,6 +11090,7 @@ static void ggml_compute_forward_rope_f32(
11075
11090
  rope_yarn(
11076
11091
  theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
11077
11092
  );
11093
+ sin_theta *= sin_sign;
11078
11094
 
11079
11095
  // zeta scaling for xPos only:
11080
11096
  float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
@@ -11105,6 +11121,7 @@ static void ggml_compute_forward_rope_f32(
11105
11121
  theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
11106
11122
  &cos_theta, &sin_theta
11107
11123
  );
11124
+ sin_theta *= sin_sign;
11108
11125
 
11109
11126
  theta_base *= theta_scale;
11110
11127
 
@@ -11130,7 +11147,8 @@ static void ggml_compute_forward_rope_f16(
11130
11147
  const struct ggml_compute_params * params,
11131
11148
  const struct ggml_tensor * src0,
11132
11149
  const struct ggml_tensor * src1,
11133
- struct ggml_tensor * dst) {
11150
+ struct ggml_tensor * dst,
11151
+ const bool forward) {
11134
11152
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11135
11153
  return;
11136
11154
  }
@@ -11182,6 +11200,11 @@ static void ggml_compute_forward_rope_f16(
11182
11200
  const bool is_neox = mode & 2;
11183
11201
  const bool is_glm = mode & 4;
11184
11202
 
11203
+ // backward process uses inverse rotation by cos and sin.
11204
+ // cos and sin build a rotation matrix, where the inverse is the transpose.
11205
+ // this essentially just switches the sign of sin.
11206
+ const float sin_sign = forward ? 1.0f : -1.0f;
11207
+
11185
11208
  const int32_t * pos = (const int32_t *) src1->data;
11186
11209
 
11187
11210
  for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -11198,9 +11221,9 @@ static void ggml_compute_forward_rope_f16(
11198
11221
  float block_theta = MAX(p - (n_ctx - 2), 0);
11199
11222
  for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
11200
11223
  const float cos_theta = cosf(theta_base);
11201
- const float sin_theta = sinf(theta_base);
11224
+ const float sin_theta = sinf(theta_base) * sin_sign;
11202
11225
  const float cos_block_theta = cosf(block_theta);
11203
- const float sin_block_theta = sinf(block_theta);
11226
+ const float sin_block_theta = sinf(block_theta) * sin_sign;
11204
11227
 
11205
11228
  theta_base *= theta_scale;
11206
11229
  block_theta *= theta_scale;
@@ -11224,6 +11247,7 @@ static void ggml_compute_forward_rope_f16(
11224
11247
  rope_yarn(
11225
11248
  theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
11226
11249
  );
11250
+ sin_theta *= sin_sign;
11227
11251
 
11228
11252
  theta_base *= theta_scale;
11229
11253
 
@@ -11250,6 +11274,7 @@ static void ggml_compute_forward_rope_f16(
11250
11274
  theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
11251
11275
  &cos_theta, &sin_theta
11252
11276
  );
11277
+ sin_theta *= sin_sign;
11253
11278
 
11254
11279
  theta_base *= theta_scale;
11255
11280
 
@@ -11279,11 +11304,11 @@ static void ggml_compute_forward_rope(
11279
11304
  switch (src0->type) {
11280
11305
  case GGML_TYPE_F16:
11281
11306
  {
11282
- ggml_compute_forward_rope_f16(params, src0, src1, dst);
11307
+ ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
11283
11308
  } break;
11284
11309
  case GGML_TYPE_F32:
11285
11310
  {
11286
- ggml_compute_forward_rope_f32(params, src0, src1, dst);
11311
+ ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
11287
11312
  } break;
11288
11313
  default:
11289
11314
  {
@@ -11294,216 +11319,6 @@ static void ggml_compute_forward_rope(
11294
11319
 
11295
11320
  // ggml_compute_forward_rope_back
11296
11321
 
11297
- static void ggml_compute_forward_rope_back_f32(
11298
- const struct ggml_compute_params * params,
11299
- const struct ggml_tensor * src0,
11300
- const struct ggml_tensor * src1,
11301
- struct ggml_tensor * dst) {
11302
-
11303
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11304
- return;
11305
- }
11306
-
11307
- // y = rope(x, src1)
11308
- // dx = rope_back(dy, src1)
11309
- // src0 is dy, src1 contains options
11310
-
11311
- float freq_base;
11312
- float freq_scale;
11313
-
11314
- // these two only relevant for xPos RoPE:
11315
- float xpos_base;
11316
- bool xpos_down;
11317
-
11318
- //const int n_past = ((int32_t *) dst->op_params)[0];
11319
- const int n_dims = ((int32_t *) dst->op_params)[1];
11320
- const int mode = ((int32_t *) dst->op_params)[2];
11321
- const int n_ctx = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
11322
- memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
11323
- memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
11324
- memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
11325
- memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
11326
-
11327
- GGML_TENSOR_UNARY_OP_LOCALS
11328
-
11329
- //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
11330
- //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
11331
-
11332
- assert(nb0 == sizeof(float));
11333
-
11334
- const int ith = params->ith;
11335
- const int nth = params->nth;
11336
-
11337
- const int nr = ggml_nrows(dst);
11338
-
11339
- // rows per thread
11340
- const int dr = (nr + nth - 1)/nth;
11341
-
11342
- // row range for this thread
11343
- const int ir0 = dr*ith;
11344
- const int ir1 = MIN(ir0 + dr, nr);
11345
-
11346
- // row index used to determine which thread to use
11347
- int ir = 0;
11348
-
11349
- const float theta_scale = powf(freq_base, -2.0f/n_dims);
11350
-
11351
- const bool is_neox = mode & 2;
11352
-
11353
- const int32_t * pos = (const int32_t *) src1->data;
11354
-
11355
- for (int64_t i3 = 0; i3 < ne3; i3++) {
11356
- for (int64_t i2 = 0; i2 < ne2; i2++) {
11357
- const int64_t p = pos[i2];
11358
- for (int64_t i1 = 0; i1 < ne1; i1++) {
11359
- if (ir++ < ir0) continue;
11360
- if (ir > ir1) break;
11361
-
11362
- float theta_base = freq_scale * (float)p;
11363
-
11364
- if (!is_neox) {
11365
- for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
11366
- const float cos_theta = cosf(theta_base);
11367
- const float sin_theta = sinf(theta_base);
11368
-
11369
- // zeta scaling for xPos only:
11370
- float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
11371
- if (xpos_down) zeta = 1.0f / zeta;
11372
-
11373
- theta_base *= theta_scale;
11374
-
11375
- const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11376
- float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11377
-
11378
- const float dy0 = dy[0];
11379
- const float dy1 = dy[1];
11380
-
11381
- dx[0] = dy0*cos_theta*zeta + dy1*sin_theta*zeta;
11382
- dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta;
11383
- }
11384
- } else {
11385
- for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
11386
- for (int64_t ic = 0; ic < n_dims; ic += 2) {
11387
- const float cos_theta = cosf(theta_base);
11388
- const float sin_theta = sinf(theta_base);
11389
-
11390
- theta_base *= theta_scale;
11391
-
11392
- const int64_t i0 = ib*n_dims + ic/2;
11393
-
11394
- const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11395
- float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11396
-
11397
- const float dy0 = dy[0];
11398
- const float dy1 = dy[n_dims/2];
11399
-
11400
- dx[0] = dy0*cos_theta + dy1*sin_theta;
11401
- dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta;
11402
- }
11403
- }
11404
- }
11405
- }
11406
- }
11407
- }
11408
- }
11409
-
11410
- static void ggml_compute_forward_rope_back_f16(
11411
- const struct ggml_compute_params * params,
11412
- const struct ggml_tensor * src0,
11413
- const struct ggml_tensor * src1,
11414
- struct ggml_tensor * dst) {
11415
-
11416
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11417
- return;
11418
- }
11419
-
11420
- // y = rope(x, src1)
11421
- // dx = rope_back(dy, src1)
11422
- // src0 is dy, src1 contains options
11423
-
11424
- //const int n_past = ((int32_t *) dst->op_params)[0];
11425
- const int n_dims = ((int32_t *) dst->op_params)[1];
11426
- const int mode = ((int32_t *) dst->op_params)[2];
11427
-
11428
- GGML_TENSOR_UNARY_OP_LOCALS
11429
-
11430
- //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
11431
- //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
11432
-
11433
- assert(nb0 == sizeof(ggml_fp16_t));
11434
-
11435
- const int ith = params->ith;
11436
- const int nth = params->nth;
11437
-
11438
- const int nr = ggml_nrows(dst);
11439
-
11440
- // rows per thread
11441
- const int dr = (nr + nth - 1)/nth;
11442
-
11443
- // row range for this thread
11444
- const int ir0 = dr*ith;
11445
- const int ir1 = MIN(ir0 + dr, nr);
11446
-
11447
- // row index used to determine which thread to use
11448
- int ir = 0;
11449
-
11450
- const float theta_scale = powf(10000.0, -2.0f/n_dims);
11451
-
11452
- const bool is_neox = mode & 2;
11453
-
11454
- const int32_t * pos = (const int32_t *) src1->data;
11455
-
11456
- for (int64_t i3 = 0; i3 < ne3; i3++) {
11457
- for (int64_t i2 = 0; i2 < ne2; i2++) {
11458
- const int64_t p = pos[i2];
11459
- for (int64_t i1 = 0; i1 < ne1; i1++) {
11460
- if (ir++ < ir0) continue;
11461
- if (ir > ir1) break;
11462
-
11463
- float theta_base = (float)p;
11464
-
11465
- if (!is_neox) {
11466
- for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
11467
- const float cos_theta = cosf(theta_base);
11468
- const float sin_theta = sinf(theta_base);
11469
-
11470
- theta_base *= theta_scale;
11471
-
11472
- const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11473
- ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11474
-
11475
- const float dy0 = GGML_FP16_TO_FP32(dy[0]);
11476
- const float dy1 = GGML_FP16_TO_FP32(dy[1]);
11477
-
11478
- dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
11479
- dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
11480
- }
11481
- } else {
11482
- for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
11483
- for (int64_t ic = 0; ic < n_dims; ic += 2) {
11484
- const float cos_theta = cosf(theta_base);
11485
- const float sin_theta = sinf(theta_base);
11486
-
11487
- theta_base *= theta_scale;
11488
-
11489
- const int64_t i0 = ib*n_dims + ic/2;
11490
-
11491
- const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11492
- ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11493
-
11494
- const float dy0 = GGML_FP16_TO_FP32(dy[0]);
11495
- const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]);
11496
-
11497
- dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
11498
- dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
11499
- }
11500
- }
11501
- }
11502
- }
11503
- }
11504
- }
11505
- }
11506
-
11507
11322
  static void ggml_compute_forward_rope_back(
11508
11323
  const struct ggml_compute_params * params,
11509
11324
  const struct ggml_tensor * src0,
@@ -11512,11 +11327,11 @@ static void ggml_compute_forward_rope_back(
11512
11327
  switch (src0->type) {
11513
11328
  case GGML_TYPE_F16:
11514
11329
  {
11515
- ggml_compute_forward_rope_back_f16(params, src0, src1, dst);
11330
+ ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
11516
11331
  } break;
11517
11332
  case GGML_TYPE_F32:
11518
11333
  {
11519
- ggml_compute_forward_rope_back_f32(params, src0, src1, dst);
11334
+ ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
11520
11335
  } break;
11521
11336
  default:
11522
11337
  {
@@ -15559,17 +15374,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15559
15374
  // necessary for llama
15560
15375
  if (src0->grad) {
15561
15376
  //const int n_past = ((int32_t *) tensor->op_params)[0];
15562
- const int n_dims = ((int32_t *) tensor->op_params)[1];
15563
- const int mode = ((int32_t *) tensor->op_params)[2];
15564
- const int n_ctx = ((int32_t *) tensor->op_params)[3];
15565
- float freq_base;
15566
- float freq_scale;
15567
- float xpos_base;
15568
- bool xpos_down;
15569
- memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float));
15570
- memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
15571
- memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float));
15572
- memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(bool));
15377
+ const int n_dims = ((int32_t *) tensor->op_params)[1];
15378
+ const int mode = ((int32_t *) tensor->op_params)[2];
15379
+ const int n_ctx = ((int32_t *) tensor->op_params)[3];
15380
+ const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
15381
+ float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
15382
+
15383
+ memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
15384
+ memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
15385
+ memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
15386
+ memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
15387
+ memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
15388
+ memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
15389
+ memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
15390
+ memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
15573
15391
 
15574
15392
  src0->grad = ggml_add_or_set(ctx,
15575
15393
  src0->grad,
@@ -15579,8 +15397,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15579
15397
  n_dims,
15580
15398
  mode,
15581
15399
  n_ctx,
15400
+ n_orig_ctx,
15582
15401
  freq_base,
15583
15402
  freq_scale,
15403
+ ext_factor,
15404
+ attn_factor,
15405
+ beta_fast,
15406
+ beta_slow,
15584
15407
  xpos_base,
15585
15408
  xpos_down),
15586
15409
  zero_table);
@@ -15590,17 +15413,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15590
15413
  {
15591
15414
  if (src0->grad) {
15592
15415
  //const int n_past = ((int32_t *) tensor->op_params)[0];
15593
- const int n_dims = ((int32_t *) tensor->op_params)[1];
15594
- const int mode = ((int32_t *) tensor->op_params)[2];
15595
- const int n_ctx = ((int32_t *) tensor->op_params)[3];
15596
- float freq_base;
15597
- float freq_scale;
15598
- float xpos_base;
15599
- bool xpos_down;
15600
- memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float));
15601
- memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
15602
- memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float));
15603
- memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(bool));
15416
+ const int n_dims = ((int32_t *) tensor->op_params)[1];
15417
+ const int mode = ((int32_t *) tensor->op_params)[2];
15418
+ const int n_ctx = ((int32_t *) tensor->op_params)[3];
15419
+ const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
15420
+ float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
15421
+
15422
+ memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
15423
+ memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
15424
+ memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
15425
+ memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
15426
+ memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
15427
+ memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
15428
+ memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
15429
+ memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
15604
15430
 
15605
15431
  src0->grad = ggml_add_or_set(ctx,
15606
15432
  src0->grad,
@@ -15609,14 +15435,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15609
15435
  src1,
15610
15436
  n_dims,
15611
15437
  mode,
15612
- 0,
15613
15438
  n_ctx,
15439
+ n_orig_ctx,
15614
15440
  freq_base,
15615
15441
  freq_scale,
15616
- 0.0f,
15617
- 1.0f,
15618
- 0.0f,
15619
- 0.0f,
15442
+ ext_factor,
15443
+ attn_factor,
15444
+ beta_fast,
15445
+ beta_slow,
15620
15446
  xpos_base,
15621
15447
  xpos_down,
15622
15448
  false),
@@ -1372,8 +1372,13 @@ extern "C" {
1372
1372
  int n_dims,
1373
1373
  int mode,
1374
1374
  int n_ctx,
1375
+ int n_orig_ctx,
1375
1376
  float freq_base,
1376
1377
  float freq_scale,
1378
+ float ext_factor,
1379
+ float attn_factor,
1380
+ float beta_fast,
1381
+ float beta_slow,
1377
1382
  float xpos_base,
1378
1383
  bool xpos_down);
1379
1384