llama_cpp 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4970,8 +4970,13 @@ struct ggml_tensor * ggml_rope_back(
4970
4970
  int n_dims,
4971
4971
  int mode,
4972
4972
  int n_ctx,
4973
+ int n_orig_ctx,
4973
4974
  float freq_base,
4974
4975
  float freq_scale,
4976
+ float ext_factor,
4977
+ float attn_factor,
4978
+ float beta_fast,
4979
+ float beta_slow,
4975
4980
  float xpos_base,
4976
4981
  bool xpos_down) {
4977
4982
  GGML_ASSERT(ggml_is_vector(b));
@@ -4988,11 +4993,15 @@ struct ggml_tensor * ggml_rope_back(
4988
4993
 
4989
4994
  struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
4990
4995
 
4991
- int32_t params[8] = { /*n_past*/ 0, n_dims, mode, n_ctx };
4992
- memcpy(params + 4, &freq_base, sizeof(float));
4993
- memcpy(params + 5, &freq_scale, sizeof(float));
4994
- memcpy(params + 6, &xpos_base, sizeof(float));
4995
- memcpy(params + 7, &xpos_down, sizeof(bool));
4996
+ int32_t params[13] = { /*n_past*/ 0, n_dims, mode, n_ctx, n_orig_ctx };
4997
+ memcpy(params + 5, &freq_base, sizeof(float));
4998
+ memcpy(params + 6, &freq_scale, sizeof(float));
4999
+ memcpy(params + 7, &ext_factor, sizeof(float));
5000
+ memcpy(params + 8, &attn_factor, sizeof(float));
5001
+ memcpy(params + 9, &beta_fast, sizeof(float));
5002
+ memcpy(params + 10, &beta_slow, sizeof(float));
5003
+ memcpy(params + 11, &xpos_base, sizeof(float));
5004
+ memcpy(params + 12, &xpos_down, sizeof(bool));
4996
5005
  ggml_set_op_params(result, params, sizeof(params));
4997
5006
 
4998
5007
  result->op = GGML_OP_ROPE_BACK;
@@ -10974,7 +10983,8 @@ static void ggml_compute_forward_rope_f32(
10974
10983
  const struct ggml_compute_params * params,
10975
10984
  const struct ggml_tensor * src0,
10976
10985
  const struct ggml_tensor * src1,
10977
- struct ggml_tensor * dst) {
10986
+ struct ggml_tensor * dst,
10987
+ const bool forward) {
10978
10988
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
10979
10989
  return;
10980
10990
  }
@@ -11033,6 +11043,11 @@ static void ggml_compute_forward_rope_f32(
11033
11043
  const bool is_neox = mode & 2;
11034
11044
  const bool is_glm = mode & 4;
11035
11045
 
11046
+ // backward process uses inverse rotation by cos and sin.
11047
+ // cos and sin build a rotation matrix, where the inverse is the transpose.
11048
+ // this essentially just switches the sign of sin.
11049
+ const float sin_sign = forward ? 1.0f : -1.0f;
11050
+
11036
11051
  const int32_t * pos = (const int32_t *) src1->data;
11037
11052
 
11038
11053
  for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -11049,9 +11064,9 @@ static void ggml_compute_forward_rope_f32(
11049
11064
  float block_theta = MAX(p - (n_ctx - 2), 0);
11050
11065
  for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
11051
11066
  const float cos_theta = cosf(theta_base);
11052
- const float sin_theta = sinf(theta_base);
11067
+ const float sin_theta = sinf(theta_base) * sin_sign;
11053
11068
  const float cos_block_theta = cosf(block_theta);
11054
- const float sin_block_theta = sinf(block_theta);
11069
+ const float sin_block_theta = sinf(block_theta) * sin_sign;
11055
11070
 
11056
11071
  theta_base *= theta_scale;
11057
11072
  block_theta *= theta_scale;
@@ -11075,6 +11090,7 @@ static void ggml_compute_forward_rope_f32(
11075
11090
  rope_yarn(
11076
11091
  theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
11077
11092
  );
11093
+ sin_theta *= sin_sign;
11078
11094
 
11079
11095
  // zeta scaling for xPos only:
11080
11096
  float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
@@ -11105,6 +11121,7 @@ static void ggml_compute_forward_rope_f32(
11105
11121
  theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
11106
11122
  &cos_theta, &sin_theta
11107
11123
  );
11124
+ sin_theta *= sin_sign;
11108
11125
 
11109
11126
  theta_base *= theta_scale;
11110
11127
 
@@ -11130,7 +11147,8 @@ static void ggml_compute_forward_rope_f16(
11130
11147
  const struct ggml_compute_params * params,
11131
11148
  const struct ggml_tensor * src0,
11132
11149
  const struct ggml_tensor * src1,
11133
- struct ggml_tensor * dst) {
11150
+ struct ggml_tensor * dst,
11151
+ const bool forward) {
11134
11152
  if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11135
11153
  return;
11136
11154
  }
@@ -11182,6 +11200,11 @@ static void ggml_compute_forward_rope_f16(
11182
11200
  const bool is_neox = mode & 2;
11183
11201
  const bool is_glm = mode & 4;
11184
11202
 
11203
+ // backward process uses inverse rotation by cos and sin.
11204
+ // cos and sin build a rotation matrix, where the inverse is the transpose.
11205
+ // this essentially just switches the sign of sin.
11206
+ const float sin_sign = forward ? 1.0f : -1.0f;
11207
+
11185
11208
  const int32_t * pos = (const int32_t *) src1->data;
11186
11209
 
11187
11210
  for (int64_t i3 = 0; i3 < ne3; i3++) {
@@ -11198,9 +11221,9 @@ static void ggml_compute_forward_rope_f16(
11198
11221
  float block_theta = MAX(p - (n_ctx - 2), 0);
11199
11222
  for (int64_t i0 = 0; i0 < ne0 / 4; i0++) {
11200
11223
  const float cos_theta = cosf(theta_base);
11201
- const float sin_theta = sinf(theta_base);
11224
+ const float sin_theta = sinf(theta_base) * sin_sign;
11202
11225
  const float cos_block_theta = cosf(block_theta);
11203
- const float sin_block_theta = sinf(block_theta);
11226
+ const float sin_block_theta = sinf(block_theta) * sin_sign;
11204
11227
 
11205
11228
  theta_base *= theta_scale;
11206
11229
  block_theta *= theta_scale;
@@ -11224,6 +11247,7 @@ static void ggml_compute_forward_rope_f16(
11224
11247
  rope_yarn(
11225
11248
  theta_base, freq_scale, corr_dims, i0, ext_factor, attn_factor, &cos_theta, &sin_theta
11226
11249
  );
11250
+ sin_theta *= sin_sign;
11227
11251
 
11228
11252
  theta_base *= theta_scale;
11229
11253
 
@@ -11250,6 +11274,7 @@ static void ggml_compute_forward_rope_f16(
11250
11274
  theta_base, freq_scale, corr_dims, cur_rot, ext_factor, attn_factor,
11251
11275
  &cos_theta, &sin_theta
11252
11276
  );
11277
+ sin_theta *= sin_sign;
11253
11278
 
11254
11279
  theta_base *= theta_scale;
11255
11280
 
@@ -11279,11 +11304,11 @@ static void ggml_compute_forward_rope(
11279
11304
  switch (src0->type) {
11280
11305
  case GGML_TYPE_F16:
11281
11306
  {
11282
- ggml_compute_forward_rope_f16(params, src0, src1, dst);
11307
+ ggml_compute_forward_rope_f16(params, src0, src1, dst, true);
11283
11308
  } break;
11284
11309
  case GGML_TYPE_F32:
11285
11310
  {
11286
- ggml_compute_forward_rope_f32(params, src0, src1, dst);
11311
+ ggml_compute_forward_rope_f32(params, src0, src1, dst, true);
11287
11312
  } break;
11288
11313
  default:
11289
11314
  {
@@ -11294,216 +11319,6 @@ static void ggml_compute_forward_rope(
11294
11319
 
11295
11320
  // ggml_compute_forward_rope_back
11296
11321
 
11297
- static void ggml_compute_forward_rope_back_f32(
11298
- const struct ggml_compute_params * params,
11299
- const struct ggml_tensor * src0,
11300
- const struct ggml_tensor * src1,
11301
- struct ggml_tensor * dst) {
11302
-
11303
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11304
- return;
11305
- }
11306
-
11307
- // y = rope(x, src1)
11308
- // dx = rope_back(dy, src1)
11309
- // src0 is dy, src1 contains options
11310
-
11311
- float freq_base;
11312
- float freq_scale;
11313
-
11314
- // these two only relevant for xPos RoPE:
11315
- float xpos_base;
11316
- bool xpos_down;
11317
-
11318
- //const int n_past = ((int32_t *) dst->op_params)[0];
11319
- const int n_dims = ((int32_t *) dst->op_params)[1];
11320
- const int mode = ((int32_t *) dst->op_params)[2];
11321
- const int n_ctx = ((int32_t *) dst->op_params)[3]; UNUSED(n_ctx);
11322
- memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
11323
- memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
11324
- memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
11325
- memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
11326
-
11327
- GGML_TENSOR_UNARY_OP_LOCALS
11328
-
11329
- //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
11330
- //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
11331
-
11332
- assert(nb0 == sizeof(float));
11333
-
11334
- const int ith = params->ith;
11335
- const int nth = params->nth;
11336
-
11337
- const int nr = ggml_nrows(dst);
11338
-
11339
- // rows per thread
11340
- const int dr = (nr + nth - 1)/nth;
11341
-
11342
- // row range for this thread
11343
- const int ir0 = dr*ith;
11344
- const int ir1 = MIN(ir0 + dr, nr);
11345
-
11346
- // row index used to determine which thread to use
11347
- int ir = 0;
11348
-
11349
- const float theta_scale = powf(freq_base, -2.0f/n_dims);
11350
-
11351
- const bool is_neox = mode & 2;
11352
-
11353
- const int32_t * pos = (const int32_t *) src1->data;
11354
-
11355
- for (int64_t i3 = 0; i3 < ne3; i3++) {
11356
- for (int64_t i2 = 0; i2 < ne2; i2++) {
11357
- const int64_t p = pos[i2];
11358
- for (int64_t i1 = 0; i1 < ne1; i1++) {
11359
- if (ir++ < ir0) continue;
11360
- if (ir > ir1) break;
11361
-
11362
- float theta_base = freq_scale * (float)p;
11363
-
11364
- if (!is_neox) {
11365
- for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
11366
- const float cos_theta = cosf(theta_base);
11367
- const float sin_theta = sinf(theta_base);
11368
-
11369
- // zeta scaling for xPos only:
11370
- float zeta = xpos_base != 0.0f ? powf((i0 + 0.4f * ne0) / (1.4f * ne0), p / xpos_base) : 1.0f;
11371
- if (xpos_down) zeta = 1.0f / zeta;
11372
-
11373
- theta_base *= theta_scale;
11374
-
11375
- const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11376
- float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11377
-
11378
- const float dy0 = dy[0];
11379
- const float dy1 = dy[1];
11380
-
11381
- dx[0] = dy0*cos_theta*zeta + dy1*sin_theta*zeta;
11382
- dx[1] = - dy0*sin_theta*zeta + dy1*cos_theta*zeta;
11383
- }
11384
- } else {
11385
- for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
11386
- for (int64_t ic = 0; ic < n_dims; ic += 2) {
11387
- const float cos_theta = cosf(theta_base);
11388
- const float sin_theta = sinf(theta_base);
11389
-
11390
- theta_base *= theta_scale;
11391
-
11392
- const int64_t i0 = ib*n_dims + ic/2;
11393
-
11394
- const float * const dy = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11395
- float * dx = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11396
-
11397
- const float dy0 = dy[0];
11398
- const float dy1 = dy[n_dims/2];
11399
-
11400
- dx[0] = dy0*cos_theta + dy1*sin_theta;
11401
- dx[n_dims/2] = - dy0*sin_theta + dy1*cos_theta;
11402
- }
11403
- }
11404
- }
11405
- }
11406
- }
11407
- }
11408
- }
11409
-
11410
- static void ggml_compute_forward_rope_back_f16(
11411
- const struct ggml_compute_params * params,
11412
- const struct ggml_tensor * src0,
11413
- const struct ggml_tensor * src1,
11414
- struct ggml_tensor * dst) {
11415
-
11416
- if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
11417
- return;
11418
- }
11419
-
11420
- // y = rope(x, src1)
11421
- // dx = rope_back(dy, src1)
11422
- // src0 is dy, src1 contains options
11423
-
11424
- //const int n_past = ((int32_t *) dst->op_params)[0];
11425
- const int n_dims = ((int32_t *) dst->op_params)[1];
11426
- const int mode = ((int32_t *) dst->op_params)[2];
11427
-
11428
- GGML_TENSOR_UNARY_OP_LOCALS
11429
-
11430
- //printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
11431
- //printf("n_past = %d, ne2 = %d\n", n_past, ne2);
11432
-
11433
- assert(nb0 == sizeof(ggml_fp16_t));
11434
-
11435
- const int ith = params->ith;
11436
- const int nth = params->nth;
11437
-
11438
- const int nr = ggml_nrows(dst);
11439
-
11440
- // rows per thread
11441
- const int dr = (nr + nth - 1)/nth;
11442
-
11443
- // row range for this thread
11444
- const int ir0 = dr*ith;
11445
- const int ir1 = MIN(ir0 + dr, nr);
11446
-
11447
- // row index used to determine which thread to use
11448
- int ir = 0;
11449
-
11450
- const float theta_scale = powf(10000.0, -2.0f/n_dims);
11451
-
11452
- const bool is_neox = mode & 2;
11453
-
11454
- const int32_t * pos = (const int32_t *) src1->data;
11455
-
11456
- for (int64_t i3 = 0; i3 < ne3; i3++) {
11457
- for (int64_t i2 = 0; i2 < ne2; i2++) {
11458
- const int64_t p = pos[i2];
11459
- for (int64_t i1 = 0; i1 < ne1; i1++) {
11460
- if (ir++ < ir0) continue;
11461
- if (ir > ir1) break;
11462
-
11463
- float theta_base = (float)p;
11464
-
11465
- if (!is_neox) {
11466
- for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
11467
- const float cos_theta = cosf(theta_base);
11468
- const float sin_theta = sinf(theta_base);
11469
-
11470
- theta_base *= theta_scale;
11471
-
11472
- const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11473
- ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11474
-
11475
- const float dy0 = GGML_FP16_TO_FP32(dy[0]);
11476
- const float dy1 = GGML_FP16_TO_FP32(dy[1]);
11477
-
11478
- dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
11479
- dx[1] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
11480
- }
11481
- } else {
11482
- for (int64_t ib = 0; ib < ne0/n_dims; ++ib) {
11483
- for (int64_t ic = 0; ic < n_dims; ic += 2) {
11484
- const float cos_theta = cosf(theta_base);
11485
- const float sin_theta = sinf(theta_base);
11486
-
11487
- theta_base *= theta_scale;
11488
-
11489
- const int64_t i0 = ib*n_dims + ic/2;
11490
-
11491
- const ggml_fp16_t * const dy = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
11492
- ggml_fp16_t * dx = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
11493
-
11494
- const float dy0 = GGML_FP16_TO_FP32(dy[0]);
11495
- const float dy1 = GGML_FP16_TO_FP32(dy[n_dims/2]);
11496
-
11497
- dx[0] = GGML_FP32_TO_FP16( dy0*cos_theta + dy1*sin_theta);
11498
- dx[n_dims/2] = GGML_FP32_TO_FP16(-dy0*sin_theta + dy1*cos_theta);
11499
- }
11500
- }
11501
- }
11502
- }
11503
- }
11504
- }
11505
- }
11506
-
11507
11322
  static void ggml_compute_forward_rope_back(
11508
11323
  const struct ggml_compute_params * params,
11509
11324
  const struct ggml_tensor * src0,
@@ -11512,11 +11327,11 @@ static void ggml_compute_forward_rope_back(
11512
11327
  switch (src0->type) {
11513
11328
  case GGML_TYPE_F16:
11514
11329
  {
11515
- ggml_compute_forward_rope_back_f16(params, src0, src1, dst);
11330
+ ggml_compute_forward_rope_f16(params, src0, src1, dst, false);
11516
11331
  } break;
11517
11332
  case GGML_TYPE_F32:
11518
11333
  {
11519
- ggml_compute_forward_rope_back_f32(params, src0, src1, dst);
11334
+ ggml_compute_forward_rope_f32(params, src0, src1, dst, false);
11520
11335
  } break;
11521
11336
  default:
11522
11337
  {
@@ -15559,17 +15374,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15559
15374
  // necessary for llama
15560
15375
  if (src0->grad) {
15561
15376
  //const int n_past = ((int32_t *) tensor->op_params)[0];
15562
- const int n_dims = ((int32_t *) tensor->op_params)[1];
15563
- const int mode = ((int32_t *) tensor->op_params)[2];
15564
- const int n_ctx = ((int32_t *) tensor->op_params)[3];
15565
- float freq_base;
15566
- float freq_scale;
15567
- float xpos_base;
15568
- bool xpos_down;
15569
- memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float));
15570
- memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
15571
- memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float));
15572
- memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(bool));
15377
+ const int n_dims = ((int32_t *) tensor->op_params)[1];
15378
+ const int mode = ((int32_t *) tensor->op_params)[2];
15379
+ const int n_ctx = ((int32_t *) tensor->op_params)[3];
15380
+ const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
15381
+ float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
15382
+
15383
+ memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
15384
+ memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
15385
+ memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
15386
+ memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
15387
+ memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
15388
+ memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
15389
+ memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
15390
+ memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
15573
15391
 
15574
15392
  src0->grad = ggml_add_or_set(ctx,
15575
15393
  src0->grad,
@@ -15579,8 +15397,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15579
15397
  n_dims,
15580
15398
  mode,
15581
15399
  n_ctx,
15400
+ n_orig_ctx,
15582
15401
  freq_base,
15583
15402
  freq_scale,
15403
+ ext_factor,
15404
+ attn_factor,
15405
+ beta_fast,
15406
+ beta_slow,
15584
15407
  xpos_base,
15585
15408
  xpos_down),
15586
15409
  zero_table);
@@ -15590,17 +15413,20 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15590
15413
  {
15591
15414
  if (src0->grad) {
15592
15415
  //const int n_past = ((int32_t *) tensor->op_params)[0];
15593
- const int n_dims = ((int32_t *) tensor->op_params)[1];
15594
- const int mode = ((int32_t *) tensor->op_params)[2];
15595
- const int n_ctx = ((int32_t *) tensor->op_params)[3];
15596
- float freq_base;
15597
- float freq_scale;
15598
- float xpos_base;
15599
- bool xpos_down;
15600
- memcpy(&freq_base, (int32_t *) tensor->op_params + 4, sizeof(float));
15601
- memcpy(&freq_scale, (int32_t *) tensor->op_params + 5, sizeof(float));
15602
- memcpy(&xpos_base, (int32_t *) tensor->op_params + 6, sizeof(float));
15603
- memcpy(&xpos_down, (int32_t *) tensor->op_params + 7, sizeof(bool));
15416
+ const int n_dims = ((int32_t *) tensor->op_params)[1];
15417
+ const int mode = ((int32_t *) tensor->op_params)[2];
15418
+ const int n_ctx = ((int32_t *) tensor->op_params)[3];
15419
+ const int n_orig_ctx = ((int32_t *) tensor->op_params)[4];
15420
+ float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow, xpos_base, xpos_down;
15421
+
15422
+ memcpy(&freq_base, (int32_t *) tensor->op_params + 5, sizeof(float));
15423
+ memcpy(&freq_scale, (int32_t *) tensor->op_params + 6, sizeof(float));
15424
+ memcpy(&ext_factor, (int32_t *) tensor->op_params + 7, sizeof(float));
15425
+ memcpy(&attn_factor, (int32_t *) tensor->op_params + 8, sizeof(float));
15426
+ memcpy(&beta_fast, (int32_t *) tensor->op_params + 9, sizeof(float));
15427
+ memcpy(&beta_slow, (int32_t *) tensor->op_params + 10, sizeof(float));
15428
+ memcpy(&xpos_base, (int32_t *) tensor->op_params + 11, sizeof(float));
15429
+ memcpy(&xpos_down, (int32_t *) tensor->op_params + 12, sizeof(bool));
15604
15430
 
15605
15431
  src0->grad = ggml_add_or_set(ctx,
15606
15432
  src0->grad,
@@ -15609,14 +15435,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
15609
15435
  src1,
15610
15436
  n_dims,
15611
15437
  mode,
15612
- 0,
15613
15438
  n_ctx,
15439
+ n_orig_ctx,
15614
15440
  freq_base,
15615
15441
  freq_scale,
15616
- 0.0f,
15617
- 1.0f,
15618
- 0.0f,
15619
- 0.0f,
15442
+ ext_factor,
15443
+ attn_factor,
15444
+ beta_fast,
15445
+ beta_slow,
15620
15446
  xpos_base,
15621
15447
  xpos_down,
15622
15448
  false),
@@ -1372,8 +1372,13 @@ extern "C" {
1372
1372
  int n_dims,
1373
1373
  int mode,
1374
1374
  int n_ctx,
1375
+ int n_orig_ctx,
1375
1376
  float freq_base,
1376
1377
  float freq_scale,
1378
+ float ext_factor,
1379
+ float attn_factor,
1380
+ float beta_fast,
1381
+ float beta_slow,
1377
1382
  float xpos_base,
1378
1383
  bool xpos_down);
1379
1384