@fugood/llama.node 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CMakeLists.txt +4 -3
- package/package.json +14 -14
- package/scripts/llama.cpp.patch +5 -5
- package/src/llama.cpp/CMakeLists.txt +4 -0
- package/src/llama.cpp/common/CMakeLists.txt +6 -37
- package/src/llama.cpp/common/common.cpp +1 -5
- package/src/llama.cpp/common/download.cpp +47 -29
- package/src/llama.cpp/common/log.cpp +6 -0
- package/src/llama.cpp/common/log.h +2 -0
- package/src/llama.cpp/ggml/include/ggml.h +71 -0
- package/src/llama.cpp/ggml/src/CMakeLists.txt +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/CMakeLists.txt +15 -3
- package/src/llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c +29 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.cpp +283 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kernels.h +1 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +235 -34
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.cpp +289 -277
- package/src/llama.cpp/ggml/src/ggml-cpu/ops.h +4 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/repack.cpp +95 -42
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp +16 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/unary-ops.h +2 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.cpp +17 -0
- package/src/llama.cpp/ggml/src/ggml-cpu/vec.h +10 -0
- package/src/llama.cpp/src/CMakeLists.txt +6 -0
- package/src/llama.cpp/src/llama-arch.cpp +32 -0
- package/src/llama.cpp/src/llama-arch.h +2 -0
- package/src/llama.cpp/src/llama-graph.cpp +2 -1
- package/src/llama.cpp/src/llama-model.cpp +102 -0
- package/src/llama.cpp/src/llama-model.h +2 -0
- package/src/llama.cpp/src/llama-sampling.cpp +10 -5
- package/src/llama.cpp/src/llama-vocab.cpp +16 -1
- package/src/llama.cpp/src/llama-vocab.h +1 -0
- package/src/llama.cpp/src/models/afmoe.cpp +187 -0
- package/src/llama.cpp/src/models/models.h +4 -0
- package/src/llama.cpp/src/unicode.cpp +77 -0
|
@@ -7,8 +7,10 @@
|
|
|
7
7
|
#include "unary-ops.h"
|
|
8
8
|
#include "vec.h"
|
|
9
9
|
|
|
10
|
-
#include <
|
|
10
|
+
#include <cfloat>
|
|
11
11
|
#include <algorithm>
|
|
12
|
+
#include <cmath>
|
|
13
|
+
#include <functional>
|
|
12
14
|
|
|
13
15
|
// ggml_compute_forward_dup
|
|
14
16
|
|
|
@@ -1394,6 +1396,56 @@ void ggml_compute_forward_sum(
|
|
|
1394
1396
|
}
|
|
1395
1397
|
}
|
|
1396
1398
|
|
|
1399
|
+
// ggml_compute_forward_cumsum
|
|
1400
|
+
|
|
1401
|
+
static void ggml_compute_forward_cumsum_f32(
|
|
1402
|
+
const ggml_compute_params * params,
|
|
1403
|
+
ggml_tensor * dst) {
|
|
1404
|
+
|
|
1405
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
1406
|
+
|
|
1407
|
+
GGML_ASSERT(src0->nb[0] == sizeof(float));
|
|
1408
|
+
GGML_ASSERT(dst->nb[0] == sizeof(float));
|
|
1409
|
+
|
|
1410
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
|
1411
|
+
|
|
1412
|
+
GGML_ASSERT(ne0 == ne00);
|
|
1413
|
+
GGML_ASSERT(ne1 == ne01);
|
|
1414
|
+
GGML_ASSERT(ne2 == ne02);
|
|
1415
|
+
GGML_ASSERT(ne3 == ne03);
|
|
1416
|
+
|
|
1417
|
+
const auto [ir0, ir1] = get_thread_range(params, src0);
|
|
1418
|
+
|
|
1419
|
+
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
|
1420
|
+
const int64_t i03 = ir/(ne02*ne01);
|
|
1421
|
+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
|
1422
|
+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
|
1423
|
+
|
|
1424
|
+
float * src_row = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
|
1425
|
+
float * dst_row = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
|
1426
|
+
|
|
1427
|
+
ggml_vec_cumsum_f32(ne00, dst_row, src_row);
|
|
1428
|
+
}
|
|
1429
|
+
}
|
|
1430
|
+
|
|
1431
|
+
void ggml_compute_forward_cumsum(
|
|
1432
|
+
const ggml_compute_params * params,
|
|
1433
|
+
ggml_tensor * dst) {
|
|
1434
|
+
|
|
1435
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
1436
|
+
|
|
1437
|
+
switch (src0->type) {
|
|
1438
|
+
case GGML_TYPE_F32:
|
|
1439
|
+
{
|
|
1440
|
+
ggml_compute_forward_cumsum_f32(params, dst);
|
|
1441
|
+
} break;
|
|
1442
|
+
default:
|
|
1443
|
+
{
|
|
1444
|
+
GGML_ABORT("fatal error");
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1397
1449
|
// ggml_compute_forward_sum_rows
|
|
1398
1450
|
|
|
1399
1451
|
static void ggml_compute_forward_sum_rows_f32(
|
|
@@ -2140,6 +2192,83 @@ static void ggml_compute_forward_gelu(
|
|
|
2140
2192
|
}
|
|
2141
2193
|
}
|
|
2142
2194
|
|
|
2195
|
+
// ggml_compute_fill
|
|
2196
|
+
|
|
2197
|
+
static void ggml_compute_forward_fill_f32(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
2198
|
+
const float c = ggml_get_op_params_f32(dst, 0);
|
|
2199
|
+
|
|
2200
|
+
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
|
|
2201
|
+
GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
|
|
2202
|
+
|
|
2203
|
+
const auto [ir0, ir1] = get_thread_range(params, dst);
|
|
2204
|
+
|
|
2205
|
+
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
|
2206
|
+
const int64_t i03 = ir/(ne2*ne1);
|
|
2207
|
+
const int64_t i02 = (ir - i03*ne2*ne1)/ne1;
|
|
2208
|
+
const int64_t i01 = (ir - i03*ne2*ne1 - i02*ne1);
|
|
2209
|
+
|
|
2210
|
+
float * dst_ptr = (float *) ((char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
|
|
2211
|
+
|
|
2212
|
+
ggml_vec_set_f32(ne0, dst_ptr, c);
|
|
2213
|
+
}
|
|
2214
|
+
}
|
|
2215
|
+
|
|
2216
|
+
void ggml_compute_forward_fill(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
2217
|
+
ggml_compute_forward_fill_f32(params, dst);
|
|
2218
|
+
}
|
|
2219
|
+
|
|
2220
|
+
// ggml_compute_tri
|
|
2221
|
+
|
|
2222
|
+
static void ggml_compute_forward_tri_f32(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
2223
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
2224
|
+
|
|
2225
|
+
const ggml_tri_type ttype = (ggml_tri_type) ggml_get_op_params_i32(dst, 0);
|
|
2226
|
+
|
|
2227
|
+
GGML_ASSERT(ggml_is_contiguous(src0));
|
|
2228
|
+
|
|
2229
|
+
GGML_TENSOR_UNARY_OP_LOCALS
|
|
2230
|
+
|
|
2231
|
+
const auto [ir0, ir1] = get_thread_range(params, src0);
|
|
2232
|
+
|
|
2233
|
+
bool (*bipred)(int, int);
|
|
2234
|
+
|
|
2235
|
+
switch (ttype) {
|
|
2236
|
+
case GGML_TRI_TYPE_LOWER: bipred = [](int i, int r) { return i < r; }; break;
|
|
2237
|
+
case GGML_TRI_TYPE_LOWER_DIAG: bipred = [](int i, int r) { return i <= r; }; break;
|
|
2238
|
+
case GGML_TRI_TYPE_UPPER: bipred = [](int i, int r) { return i > r; }; break;
|
|
2239
|
+
case GGML_TRI_TYPE_UPPER_DIAG: bipred = [](int i, int r) { return i >= r; }; break;
|
|
2240
|
+
default: GGML_ABORT("invalid tri type");
|
|
2241
|
+
}
|
|
2242
|
+
|
|
2243
|
+
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
|
2244
|
+
const int64_t i03 = ir/(ne02*ne01);
|
|
2245
|
+
const int64_t i02 = (ir - i03*ne02*ne01)/ne01;
|
|
2246
|
+
const int64_t i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
|
2247
|
+
|
|
2248
|
+
const float * src_ptr = (const float *) ((const char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01);
|
|
2249
|
+
float * dst_ptr = ( float *) (( char *) dst->data + i03*nb3 + i02*nb2 + i01*nb1);
|
|
2250
|
+
|
|
2251
|
+
for (int i0 = 0; i0 < ne0; ++i0) {
|
|
2252
|
+
dst_ptr[i0] = bipred(i0, i01) ? src_ptr[i0] : 0.0f;
|
|
2253
|
+
}
|
|
2254
|
+
}
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2257
|
+
void ggml_compute_forward_tri(const ggml_compute_params * params, ggml_tensor * dst) {
|
|
2258
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
2259
|
+
|
|
2260
|
+
switch (src0->type) {
|
|
2261
|
+
case GGML_TYPE_F32:
|
|
2262
|
+
{
|
|
2263
|
+
ggml_compute_forward_tri_f32(params, dst);
|
|
2264
|
+
} break;
|
|
2265
|
+
default:
|
|
2266
|
+
{
|
|
2267
|
+
GGML_ABORT("fatal error");
|
|
2268
|
+
}
|
|
2269
|
+
}
|
|
2270
|
+
}
|
|
2271
|
+
|
|
2143
2272
|
// ggml_compute_forward_gelu_erf
|
|
2144
2273
|
|
|
2145
2274
|
static void ggml_compute_forward_gelu_erf_f32(
|
|
@@ -5503,194 +5632,28 @@ static void ggml_mrope_cache_init(
|
|
|
5503
5632
|
}
|
|
5504
5633
|
}
|
|
5505
5634
|
|
|
5506
|
-
static void ggml_compute_forward_rope_f32(
|
|
5507
|
-
const ggml_compute_params * params,
|
|
5508
|
-
ggml_tensor * dst,
|
|
5509
|
-
const bool forward) {
|
|
5510
|
-
|
|
5511
|
-
const ggml_tensor * src0 = dst->src[0];
|
|
5512
|
-
const ggml_tensor * src1 = dst->src[1];
|
|
5513
|
-
const ggml_tensor * src2 = dst->src[2];
|
|
5514
|
-
|
|
5515
|
-
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
|
5516
|
-
int sections[4];
|
|
5517
|
-
|
|
5518
|
-
//const int n_past = ((int32_t *) dst->op_params)[0];
|
|
5519
|
-
const int n_dims = ((int32_t *) dst->op_params)[1];
|
|
5520
|
-
const int mode = ((int32_t *) dst->op_params)[2];
|
|
5521
|
-
//const int n_ctx = ((int32_t *) dst->op_params)[3];
|
|
5522
|
-
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
|
|
5523
|
-
|
|
5524
|
-
memcpy(&freq_base, (int32_t *) dst->op_params + 5, sizeof(float));
|
|
5525
|
-
memcpy(&freq_scale, (int32_t *) dst->op_params + 6, sizeof(float));
|
|
5526
|
-
memcpy(&ext_factor, (int32_t *) dst->op_params + 7, sizeof(float));
|
|
5527
|
-
memcpy(&attn_factor, (int32_t *) dst->op_params + 8, sizeof(float));
|
|
5528
|
-
memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float));
|
|
5529
|
-
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
|
|
5530
|
-
memcpy(§ions, (int32_t *) dst->op_params + 11, sizeof(int)*4);
|
|
5531
|
-
|
|
5532
|
-
GGML_TENSOR_UNARY_OP_LOCALS
|
|
5533
|
-
|
|
5534
|
-
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
|
5535
|
-
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
|
5536
|
-
|
|
5537
|
-
GGML_ASSERT(nb00 == sizeof(float));
|
|
5538
|
-
|
|
5539
|
-
const int ith = params->ith;
|
|
5540
|
-
const int nth = params->nth;
|
|
5541
|
-
|
|
5542
|
-
const int nr = ggml_nrows(dst);
|
|
5543
|
-
|
|
5544
|
-
GGML_ASSERT(n_dims <= ne0);
|
|
5545
|
-
GGML_ASSERT(n_dims % 2 == 0);
|
|
5546
|
-
|
|
5547
|
-
// rows per thread
|
|
5548
|
-
const int dr = (nr + nth - 1)/nth;
|
|
5549
|
-
|
|
5550
|
-
// row range for this thread
|
|
5551
|
-
const int ir0 = dr*ith;
|
|
5552
|
-
const int ir1 = MIN(ir0 + dr, nr);
|
|
5553
|
-
|
|
5554
|
-
// row index used to determine which thread to use
|
|
5555
|
-
int ir = 0;
|
|
5556
|
-
|
|
5557
|
-
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
|
5558
|
-
|
|
5559
|
-
float corr_dims[2];
|
|
5560
|
-
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
|
5561
|
-
|
|
5562
|
-
const bool is_neox = mode & GGML_ROPE_TYPE_NEOX;
|
|
5563
|
-
const bool is_mrope = mode & GGML_ROPE_TYPE_MROPE; // ggml_rope_multi, multimodal rotary position embedding
|
|
5564
|
-
const bool is_imrope = mode == GGML_ROPE_TYPE_IMROPE; // qwen3vl apply interleaved mrope
|
|
5565
|
-
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
|
|
5566
|
-
|
|
5567
|
-
if (is_mrope) {
|
|
5568
|
-
GGML_ASSERT(sections[0] > 0 || sections[1] > 0 || sections[2] > 0);
|
|
5569
|
-
}
|
|
5570
|
-
|
|
5571
|
-
if (is_vision) {
|
|
5572
|
-
GGML_ASSERT(n_dims == ne0/2);
|
|
5573
|
-
}
|
|
5574
|
-
|
|
5575
|
-
const float * freq_factors = NULL;
|
|
5576
|
-
if (src2 != NULL) {
|
|
5577
|
-
GGML_ASSERT(src2->type == GGML_TYPE_F32);
|
|
5578
|
-
GGML_ASSERT(src2->ne[0] >= n_dims / 2);
|
|
5579
|
-
freq_factors = (const float *) src2->data;
|
|
5580
|
-
}
|
|
5581
|
-
|
|
5582
|
-
// backward process uses inverse rotation by cos and sin.
|
|
5583
|
-
// cos and sin build a rotation matrix, where the inverse is the transpose.
|
|
5584
|
-
// this essentially just switches the sign of sin.
|
|
5585
|
-
const float sin_sign = forward ? 1.0f : -1.0f;
|
|
5586
|
-
|
|
5587
|
-
const int32_t * pos = (const int32_t *) src1->data;
|
|
5588
|
-
|
|
5589
|
-
for (int64_t i3 = 0; i3 < ne3; i3++) { // batch
|
|
5590
|
-
for (int64_t i2 = 0; i2 < ne2; i2++) { // seq-len
|
|
5591
|
-
|
|
5592
|
-
float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith;
|
|
5593
|
-
if (!is_mrope) {
|
|
5594
|
-
const int64_t p = pos[i2];
|
|
5595
|
-
ggml_rope_cache_init(p, freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale);
|
|
5596
|
-
}
|
|
5597
|
-
else {
|
|
5598
|
-
const int64_t p_t = pos[i2];
|
|
5599
|
-
const int64_t p_h = pos[i2 + ne2];
|
|
5600
|
-
const int64_t p_w = pos[i2 + ne2 * 2];
|
|
5601
|
-
const int64_t p_e = pos[i2 + ne2 * 3];
|
|
5602
|
-
ggml_mrope_cache_init(
|
|
5603
|
-
p_t, p_h, p_w, p_e, sections, is_imrope, is_vision,
|
|
5604
|
-
freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale);
|
|
5605
|
-
}
|
|
5606
|
-
|
|
5607
|
-
for (int64_t i1 = 0; i1 < ne1; i1++) { // attn-heads
|
|
5608
|
-
if (ir++ < ir0) continue;
|
|
5609
|
-
if (ir > ir1) break;
|
|
5610
|
-
|
|
5611
|
-
if (is_neox || is_mrope) {
|
|
5612
|
-
if (is_vision){
|
|
5613
|
-
for (int64_t i0 = 0; i0 < n_dims; i0 += 2) {
|
|
5614
|
-
const int64_t ic = i0/2;
|
|
5615
|
-
|
|
5616
|
-
const float cos_theta = cache[i0 + 0];
|
|
5617
|
-
const float sin_theta = cache[i0 + 1];
|
|
5618
|
-
|
|
5619
|
-
const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5620
|
-
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5621
|
-
|
|
5622
|
-
const float x0 = src[0];
|
|
5623
|
-
const float x1 = src[n_dims];
|
|
5624
|
-
|
|
5625
|
-
dst_data[0] = x0*cos_theta - x1*sin_theta;
|
|
5626
|
-
dst_data[n_dims] = x0*sin_theta + x1*cos_theta;
|
|
5627
|
-
}
|
|
5628
|
-
} else {
|
|
5629
|
-
for (int64_t i0 = 0; i0 < n_dims; i0 += 2) {
|
|
5630
|
-
const int64_t ic = i0/2;
|
|
5631
|
-
|
|
5632
|
-
const float cos_theta = cache[i0 + 0];
|
|
5633
|
-
const float sin_theta = cache[i0 + 1];
|
|
5634
|
-
|
|
5635
|
-
const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5636
|
-
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5637
|
-
|
|
5638
|
-
const float x0 = src[0];
|
|
5639
|
-
const float x1 = src[n_dims/2];
|
|
5640
|
-
|
|
5641
|
-
dst_data[0] = x0*cos_theta - x1*sin_theta;
|
|
5642
|
-
dst_data[n_dims/2] = x0*sin_theta + x1*cos_theta;
|
|
5643
|
-
}
|
|
5644
|
-
}
|
|
5645
|
-
} else {
|
|
5646
|
-
for (int64_t i0 = 0; i0 < n_dims; i0 += 2) {
|
|
5647
|
-
const float cos_theta = cache[i0 + 0];
|
|
5648
|
-
const float sin_theta = cache[i0 + 1];
|
|
5649
|
-
|
|
5650
|
-
const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
|
5651
|
-
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
5652
|
-
|
|
5653
|
-
const float x0 = src[0];
|
|
5654
|
-
const float x1 = src[1];
|
|
5655
5635
|
|
|
5656
|
-
|
|
5657
|
-
|
|
5658
|
-
|
|
5659
|
-
|
|
5660
|
-
|
|
5661
|
-
if (is_vision) {
|
|
5662
|
-
for (int64_t i0 = n_dims; i0 < ne0; i0 += 2) {
|
|
5663
|
-
const int64_t ic = i0/2;
|
|
5664
|
-
|
|
5665
|
-
const float cos_theta = cache[i0 + 0];
|
|
5666
|
-
const float sin_theta = cache[i0 + 1];
|
|
5636
|
+
template<typename T>
|
|
5637
|
+
static void rotate_pairs(const int64_t n, const int64_t n_offset, const float * cache, const T * src_data, T * dst_data, const int scale = 2) {
|
|
5638
|
+
for (int64_t i0 = 0; i0 < n; i0 += 2) {
|
|
5639
|
+
const int64_t ic = i0/scale; // hack for GGML_ROPE_TYPE_NORMAL, where we need ic = i0; for all other cases, ic = i0/2
|
|
5667
5640
|
|
|
5668
|
-
|
|
5669
|
-
|
|
5641
|
+
const float cos_theta = cache[i0 + 0];
|
|
5642
|
+
const float sin_theta = cache[i0 + 1];
|
|
5670
5643
|
|
|
5671
|
-
|
|
5672
|
-
|
|
5644
|
+
const T * const src = src_data + ic;
|
|
5645
|
+
T * dst = dst_data + ic;
|
|
5673
5646
|
|
|
5674
|
-
|
|
5675
|
-
|
|
5676
|
-
}
|
|
5677
|
-
} else {
|
|
5678
|
-
// fill the remain channels with data from src tensor
|
|
5679
|
-
for (int64_t i0 = n_dims; i0 < ne0; i0 += 2) {
|
|
5680
|
-
const float * const src = (float *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
|
5681
|
-
float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
5647
|
+
const float x0 = type_conversion_table<T>::to_f32(src[0]);
|
|
5648
|
+
const float x1 = type_conversion_table<T>::to_f32(src[n_offset]);
|
|
5682
5649
|
|
|
5683
|
-
|
|
5684
|
-
|
|
5685
|
-
|
|
5686
|
-
}
|
|
5687
|
-
}
|
|
5688
|
-
}
|
|
5689
|
-
}
|
|
5650
|
+
dst[0] = type_conversion_table<T>::from_f32(x0*cos_theta - x1*sin_theta);
|
|
5651
|
+
dst[n_offset] = type_conversion_table<T>::from_f32(x0*sin_theta + x1*cos_theta);
|
|
5652
|
+
}
|
|
5690
5653
|
}
|
|
5691
5654
|
|
|
5692
|
-
|
|
5693
|
-
static void
|
|
5655
|
+
template<typename T> //float or ggml_fp16_t
|
|
5656
|
+
static void ggml_compute_forward_rope_flt(
|
|
5694
5657
|
const ggml_compute_params * params,
|
|
5695
5658
|
ggml_tensor * dst,
|
|
5696
5659
|
const bool forward) {
|
|
@@ -5699,6 +5662,9 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5699
5662
|
const ggml_tensor * src1 = dst->src[1];
|
|
5700
5663
|
const ggml_tensor * src2 = dst->src[2];
|
|
5701
5664
|
|
|
5665
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16);
|
|
5666
|
+
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
|
5667
|
+
|
|
5702
5668
|
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
|
|
5703
5669
|
int sections[4];
|
|
5704
5670
|
|
|
@@ -5707,6 +5673,7 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5707
5673
|
const int mode = ((int32_t *) dst->op_params)[2];
|
|
5708
5674
|
//const int n_ctx = ((int32_t *) dst->op_params)[3];
|
|
5709
5675
|
const int n_ctx_orig = ((int32_t *) dst->op_params)[4];
|
|
5676
|
+
|
|
5710
5677
|
memcpy(&freq_base, (int32_t *) dst->op_params + 5, sizeof(float));
|
|
5711
5678
|
memcpy(&freq_scale, (int32_t *) dst->op_params + 6, sizeof(float));
|
|
5712
5679
|
memcpy(&ext_factor, (int32_t *) dst->op_params + 7, sizeof(float));
|
|
@@ -5715,13 +5682,13 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5715
5682
|
memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float));
|
|
5716
5683
|
memcpy(§ions, (int32_t *) dst->op_params + 11, sizeof(int)*4);
|
|
5717
5684
|
|
|
5718
|
-
|
|
5719
5685
|
GGML_TENSOR_UNARY_OP_LOCALS
|
|
5720
5686
|
|
|
5721
5687
|
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
|
|
5722
5688
|
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
|
|
5723
5689
|
|
|
5724
|
-
GGML_ASSERT(nb0 ==
|
|
5690
|
+
GGML_ASSERT(nb0 == nb00);
|
|
5691
|
+
GGML_ASSERT(nb0 == sizeof(T));
|
|
5725
5692
|
|
|
5726
5693
|
const int ith = params->ith;
|
|
5727
5694
|
const int nth = params->nth;
|
|
@@ -5746,12 +5713,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5746
5713
|
float corr_dims[2];
|
|
5747
5714
|
ggml_rope_yarn_corr_dims(n_dims, n_ctx_orig, freq_base, beta_fast, beta_slow, corr_dims);
|
|
5748
5715
|
|
|
5749
|
-
const bool
|
|
5750
|
-
const bool
|
|
5751
|
-
const bool is_imrope = mode == GGML_ROPE_TYPE_IMROPE;
|
|
5716
|
+
const bool is_imrope = mode == GGML_ROPE_TYPE_IMROPE; // qwen3vl apply interleaved mrope
|
|
5717
|
+
const bool mrope_used = mode & GGML_ROPE_TYPE_MROPE; // ggml_rope_multi, note: also true for vision (24 & 8 == true) and for imrope
|
|
5752
5718
|
const bool is_vision = mode == GGML_ROPE_TYPE_VISION;
|
|
5753
5719
|
|
|
5754
|
-
if (
|
|
5720
|
+
if (mrope_used) {
|
|
5755
5721
|
GGML_ASSERT(sections[0] > 0 || sections[1] > 0 || sections[2] > 0);
|
|
5756
5722
|
}
|
|
5757
5723
|
|
|
@@ -5773,11 +5739,11 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5773
5739
|
|
|
5774
5740
|
const int32_t * pos = (const int32_t *) src1->data;
|
|
5775
5741
|
|
|
5776
|
-
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
|
5777
|
-
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
|
5742
|
+
for (int64_t i3 = 0; i3 < ne3; i3++) { // batch
|
|
5743
|
+
for (int64_t i2 = 0; i2 < ne2; i2++) { // seq-len
|
|
5778
5744
|
|
|
5779
5745
|
float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith;
|
|
5780
|
-
if (!
|
|
5746
|
+
if (!mrope_used) {
|
|
5781
5747
|
const int64_t p = pos[i2];
|
|
5782
5748
|
ggml_rope_cache_init(p, freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale);
|
|
5783
5749
|
}
|
|
@@ -5791,86 +5757,40 @@ static void ggml_compute_forward_rope_f16(
|
|
|
5791
5757
|
freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale);
|
|
5792
5758
|
}
|
|
5793
5759
|
|
|
5794
|
-
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
|
5760
|
+
for (int64_t i1 = 0; i1 < ne1; i1++) { // attn-heads
|
|
5795
5761
|
if (ir++ < ir0) continue;
|
|
5796
5762
|
if (ir > ir1) break;
|
|
5797
5763
|
|
|
5798
|
-
|
|
5799
|
-
|
|
5800
|
-
|
|
5801
|
-
|
|
5802
|
-
|
|
5803
|
-
|
|
5804
|
-
|
|
5805
|
-
|
|
5806
|
-
|
|
5807
|
-
|
|
5808
|
-
|
|
5809
|
-
|
|
5810
|
-
|
|
5811
|
-
|
|
5812
|
-
|
|
5813
|
-
|
|
5814
|
-
|
|
5815
|
-
} else {
|
|
5816
|
-
for (int64_t i0 = 0; i0 < n_dims; i0 += 2) {
|
|
5817
|
-
const int64_t ic = i0/2;
|
|
5818
|
-
|
|
5819
|
-
const float cos_theta = cache[i0 + 0];
|
|
5820
|
-
const float sin_theta = cache[i0 + 1];
|
|
5821
|
-
|
|
5822
|
-
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5823
|
-
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5824
|
-
|
|
5825
|
-
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5826
|
-
const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims/2]);
|
|
5827
|
-
|
|
5828
|
-
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5829
|
-
dst_data[n_dims/2] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5830
|
-
}
|
|
5831
|
-
}
|
|
5832
|
-
} else {
|
|
5833
|
-
for (int64_t i0 = 0; i0 < n_dims; i0 += 2) {
|
|
5834
|
-
const float cos_theta = cache[i0 + 0];
|
|
5835
|
-
const float sin_theta = cache[i0 + 1];
|
|
5836
|
-
|
|
5837
|
-
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
|
5838
|
-
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
5839
|
-
|
|
5840
|
-
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5841
|
-
const float x1 = GGML_CPU_FP16_TO_FP32(src[1]);
|
|
5842
|
-
|
|
5843
|
-
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5844
|
-
dst_data[1] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5845
|
-
}
|
|
5764
|
+
T * src = (T *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01);
|
|
5765
|
+
T * dst_data = (T *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1);
|
|
5766
|
+
|
|
5767
|
+
switch (mode) {
|
|
5768
|
+
case GGML_ROPE_TYPE_NORMAL:
|
|
5769
|
+
rotate_pairs<T>(n_dims, 1, cache, src, dst_data, 1);
|
|
5770
|
+
break;
|
|
5771
|
+
case GGML_ROPE_TYPE_NEOX:
|
|
5772
|
+
case GGML_ROPE_TYPE_MROPE:
|
|
5773
|
+
case GGML_ROPE_TYPE_IMROPE:
|
|
5774
|
+
rotate_pairs<T>(n_dims, n_dims/2, cache, src, dst_data);
|
|
5775
|
+
break;
|
|
5776
|
+
case GGML_ROPE_TYPE_VISION:
|
|
5777
|
+
rotate_pairs<T>(ne0, n_dims, cache, src, dst_data);
|
|
5778
|
+
break;
|
|
5779
|
+
default:
|
|
5780
|
+
GGML_ABORT("rope type not supported");
|
|
5846
5781
|
}
|
|
5847
5782
|
|
|
5848
|
-
if (is_vision) {
|
|
5849
|
-
|
|
5850
|
-
const int64_t ic = i0/2;
|
|
5851
|
-
|
|
5852
|
-
const float cos_theta = cache[i0 + 0];
|
|
5853
|
-
const float sin_theta = cache[i0 + 1];
|
|
5854
|
-
|
|
5855
|
-
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00);
|
|
5856
|
-
ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0);
|
|
5857
|
-
|
|
5858
|
-
const float x0 = GGML_CPU_FP16_TO_FP32(src[0]);
|
|
5859
|
-
const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims]);
|
|
5860
|
-
|
|
5861
|
-
dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta);
|
|
5862
|
-
dst_data[n_dims] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta);
|
|
5863
|
-
}
|
|
5864
|
-
} else {
|
|
5783
|
+
if (!is_vision) {
|
|
5784
|
+
// fill the remain channels with data from src tensor
|
|
5865
5785
|
for (int64_t i0 = n_dims; i0 < ne0; i0 += 2) {
|
|
5866
|
-
const
|
|
5867
|
-
|
|
5786
|
+
const T * const src = (T *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00);
|
|
5787
|
+
T * dst_data = (T *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
|
5868
5788
|
|
|
5869
5789
|
dst_data[0] = src[0];
|
|
5870
5790
|
dst_data[1] = src[1];
|
|
5871
5791
|
}
|
|
5872
5792
|
}
|
|
5873
|
-
}
|
|
5793
|
+
} //attn-heads
|
|
5874
5794
|
}
|
|
5875
5795
|
}
|
|
5876
5796
|
}
|
|
@@ -5884,11 +5804,11 @@ void ggml_compute_forward_rope(
|
|
|
5884
5804
|
switch (src0->type) {
|
|
5885
5805
|
case GGML_TYPE_F16:
|
|
5886
5806
|
{
|
|
5887
|
-
|
|
5807
|
+
ggml_compute_forward_rope_flt<ggml_fp16_t>(params, dst, true);
|
|
5888
5808
|
} break;
|
|
5889
5809
|
case GGML_TYPE_F32:
|
|
5890
5810
|
{
|
|
5891
|
-
|
|
5811
|
+
ggml_compute_forward_rope_flt<float>(params, dst, true);
|
|
5892
5812
|
} break;
|
|
5893
5813
|
default:
|
|
5894
5814
|
{
|
|
@@ -5908,11 +5828,11 @@ void ggml_compute_forward_rope_back(
|
|
|
5908
5828
|
switch (src0->type) {
|
|
5909
5829
|
case GGML_TYPE_F16:
|
|
5910
5830
|
{
|
|
5911
|
-
|
|
5831
|
+
ggml_compute_forward_rope_flt<ggml_fp16_t>(params, dst, false);
|
|
5912
5832
|
} break;
|
|
5913
5833
|
case GGML_TYPE_F32:
|
|
5914
5834
|
{
|
|
5915
|
-
|
|
5835
|
+
ggml_compute_forward_rope_flt<float>(params, dst, false);
|
|
5916
5836
|
} break;
|
|
5917
5837
|
default:
|
|
5918
5838
|
{
|
|
@@ -7873,6 +7793,18 @@ void ggml_compute_forward_timestep_embedding(
|
|
|
7873
7793
|
|
|
7874
7794
|
// ggml_compute_forward_argsort
|
|
7875
7795
|
|
|
7796
|
+
template<enum ggml_sort_order order>
|
|
7797
|
+
struct argsort_cmp {
|
|
7798
|
+
const float * data;
|
|
7799
|
+
bool operator()(int32_t a, int32_t b) const {
|
|
7800
|
+
if constexpr (order == GGML_SORT_ORDER_ASC) {
|
|
7801
|
+
return data[a] < data[b];
|
|
7802
|
+
} else {
|
|
7803
|
+
return data[a] > data[b];
|
|
7804
|
+
}
|
|
7805
|
+
}
|
|
7806
|
+
};
|
|
7807
|
+
|
|
7876
7808
|
static void ggml_compute_forward_argsort_f32(
|
|
7877
7809
|
const ggml_compute_params * params,
|
|
7878
7810
|
ggml_tensor * dst) {
|
|
@@ -7891,23 +7823,25 @@ static void ggml_compute_forward_argsort_f32(
|
|
|
7891
7823
|
ggml_sort_order order = (ggml_sort_order) ggml_get_op_params_i32(dst, 0);
|
|
7892
7824
|
|
|
7893
7825
|
for (int64_t i = ith; i < nr; i += nth) {
|
|
7894
|
-
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
|
|
7895
7826
|
const float * src_data = (float *)((char *) src0->data + i*nb01);
|
|
7896
7827
|
|
|
7828
|
+
int32_t * dst_data = (int32_t *)((char *) dst->data + i*nb1);
|
|
7829
|
+
|
|
7897
7830
|
for (int64_t j = 0; j < ne0; j++) {
|
|
7898
7831
|
dst_data[j] = j;
|
|
7899
7832
|
}
|
|
7900
7833
|
|
|
7901
|
-
|
|
7902
|
-
|
|
7903
|
-
|
|
7904
|
-
|
|
7905
|
-
|
|
7906
|
-
|
|
7907
|
-
|
|
7908
|
-
|
|
7909
|
-
|
|
7910
|
-
|
|
7834
|
+
switch (order) {
|
|
7835
|
+
case GGML_SORT_ORDER_ASC:
|
|
7836
|
+
std::sort(dst_data, dst_data + ne0, argsort_cmp<GGML_SORT_ORDER_ASC>{src_data});
|
|
7837
|
+
break;
|
|
7838
|
+
|
|
7839
|
+
case GGML_SORT_ORDER_DESC:
|
|
7840
|
+
std::sort(dst_data, dst_data + ne0, argsort_cmp<GGML_SORT_ORDER_DESC>{src_data});
|
|
7841
|
+
break;
|
|
7842
|
+
|
|
7843
|
+
default:
|
|
7844
|
+
GGML_ABORT("invalid sort order");
|
|
7911
7845
|
}
|
|
7912
7846
|
}
|
|
7913
7847
|
}
|
|
@@ -8730,7 +8664,7 @@ static void ggml_compute_forward_ssm_scan_f32(
|
|
|
8730
8664
|
// n_head
|
|
8731
8665
|
for (int h = ih0; h < ih1; ++h) {
|
|
8732
8666
|
// ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
|
|
8733
|
-
const float dt_soft_plus =
|
|
8667
|
+
const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
|
|
8734
8668
|
const float dA = expf(dt_soft_plus * A[h]);
|
|
8735
8669
|
const int g = h / (nh / ng); // repeat_interleave
|
|
8736
8670
|
|
|
@@ -8827,7 +8761,7 @@ static void ggml_compute_forward_ssm_scan_f32(
|
|
|
8827
8761
|
// n_head
|
|
8828
8762
|
for (int h = ih0; h < ih1; ++h) {
|
|
8829
8763
|
// ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16
|
|
8830
|
-
const float dt_soft_plus =
|
|
8764
|
+
const float dt_soft_plus = ggml_compute_softplus_f32(dt[h]);
|
|
8831
8765
|
const int g = h / (nh / ng); // repeat_interleave
|
|
8832
8766
|
|
|
8833
8767
|
// dim
|
|
@@ -9110,6 +9044,14 @@ void ggml_compute_forward_unary(
|
|
|
9110
9044
|
{
|
|
9111
9045
|
ggml_compute_forward_xielu(params, dst);
|
|
9112
9046
|
} break;
|
|
9047
|
+
case GGML_UNARY_OP_EXPM1:
|
|
9048
|
+
{
|
|
9049
|
+
ggml_compute_forward_expm1(params, dst);
|
|
9050
|
+
} break;
|
|
9051
|
+
case GGML_UNARY_OP_SOFTPLUS:
|
|
9052
|
+
{
|
|
9053
|
+
ggml_compute_forward_softplus(params, dst);
|
|
9054
|
+
} break;
|
|
9113
9055
|
default:
|
|
9114
9056
|
{
|
|
9115
9057
|
GGML_ABORT("fatal error");
|
|
@@ -9706,6 +9648,76 @@ void ggml_compute_forward_gla(
|
|
|
9706
9648
|
}
|
|
9707
9649
|
}
|
|
9708
9650
|
|
|
9651
|
+
static void ggml_compute_forward_solve_tri_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
|
|
9652
|
+
const struct ggml_tensor * src0 = dst->src[0]; // A (lower triangular)
|
|
9653
|
+
const struct ggml_tensor * src1 = dst->src[1]; // B (RHS)
|
|
9654
|
+
|
|
9655
|
+
GGML_TENSOR_BINARY_OP_LOCALS;
|
|
9656
|
+
|
|
9657
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
|
9658
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
|
9659
|
+
GGML_ASSERT(dst->type == GGML_TYPE_F32);
|
|
9660
|
+
|
|
9661
|
+
GGML_ASSERT(ne00 == ne01); // A must be square
|
|
9662
|
+
GGML_ASSERT(ne0 == ne10); // solution cols == B cols
|
|
9663
|
+
GGML_ASSERT(ne1 == ne11); // solution rows == B rows
|
|
9664
|
+
|
|
9665
|
+
GGML_ASSERT(ne02 == ne12 && ne12 == ne2);
|
|
9666
|
+
GGML_ASSERT(ne03 == ne13 && ne13 == ne3);
|
|
9667
|
+
|
|
9668
|
+
const int ith = params->ith;
|
|
9669
|
+
const int nth = params->nth;
|
|
9670
|
+
|
|
9671
|
+
const int64_t k = ne10; // number of RHS columns
|
|
9672
|
+
const int64_t n = ne11; // A is n×n
|
|
9673
|
+
const int64_t nr = ne02 * ne03 * k; // we're parallelizing on columns here, so seq x token x column will be the unit
|
|
9674
|
+
|
|
9675
|
+
// chunks per thread
|
|
9676
|
+
const int64_t dr = (nr + nth - 1)/nth;
|
|
9677
|
+
|
|
9678
|
+
// chunk range for this thread
|
|
9679
|
+
const int64_t ir0 = dr*ith;
|
|
9680
|
+
const int64_t ir1 = MIN(ir0 + dr, nr);
|
|
9681
|
+
|
|
9682
|
+
const float * A = (const float *) src0->data; // [n, n, B1, B2]
|
|
9683
|
+
const float * B = (const float *) src1->data; // [n, k, B1, B2]
|
|
9684
|
+
float * X = ( float *) dst->data; // [n, k, B1, B2]
|
|
9685
|
+
|
|
9686
|
+
for (int64_t ir = ir0; ir < ir1; ++ir) {
|
|
9687
|
+
const int64_t i03 = ir/(ne02*k);
|
|
9688
|
+
const int64_t i02 = (ir - i03*ne02*k)/k;
|
|
9689
|
+
const int64_t i01 = (ir - i03*ne02*k - i02*k);
|
|
9690
|
+
|
|
9691
|
+
const float * A_batch = A + i02 * nb02 / sizeof(float) + i03 * nb03 / sizeof(float);
|
|
9692
|
+
const float * B_batch = B + i02 * nb12 / sizeof(float) + i03 * nb13 / sizeof(float);
|
|
9693
|
+
|
|
9694
|
+
float * X_batch = X + i02 * nb2 / sizeof(float) + i03 * nb3 / sizeof(float);
|
|
9695
|
+
|
|
9696
|
+
for (int64_t i00 = 0; i00 < n; ++i00) {
|
|
9697
|
+
float sum = 0.0f;
|
|
9698
|
+
for (int64_t t = 0; t < i00; ++t) {
|
|
9699
|
+
sum += A_batch[i00 * n + t] * X_batch[i01 * n + t];
|
|
9700
|
+
}
|
|
9701
|
+
|
|
9702
|
+
const float diag = A_batch[i00 * n + i00];
|
|
9703
|
+
GGML_ASSERT(diag != 0.0f && "Zero diagonal in triangular matrix");
|
|
9704
|
+
|
|
9705
|
+
X_batch[i01 * n + i00] = (B_batch[i00 * k + i01] - sum) / diag;
|
|
9706
|
+
}
|
|
9707
|
+
}
|
|
9708
|
+
}
|
|
9709
|
+
|
|
9710
|
+
void ggml_compute_forward_solve_tri(const struct ggml_compute_params * params, struct ggml_tensor * dst) {
|
|
9711
|
+
const ggml_tensor * src0 = dst->src[0];
|
|
9712
|
+
const ggml_tensor * src1 = dst->src[1];
|
|
9713
|
+
|
|
9714
|
+
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32) {
|
|
9715
|
+
ggml_compute_forward_solve_tri_f32(params, dst);
|
|
9716
|
+
} else {
|
|
9717
|
+
GGML_ABORT("fatal error");
|
|
9718
|
+
}
|
|
9719
|
+
}
|
|
9720
|
+
|
|
9709
9721
|
// ggml_compute_forward_rwkv_wkv7
|
|
9710
9722
|
|
|
9711
9723
|
static void ggml_compute_forward_rwkv_wkv7_f32(
|