llama-rb 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +1 -3
- data/README.md +2 -8
- data/bin/console +7 -0
- data/ext/Makefile +4 -0
- data/ext/extconf.rb +10 -0
- data/lib/llama/model.rb +36 -64
- data/lib/llama/version.rb +1 -1
- data/lib/llama.rb +0 -1
- data/llama-rb.gemspec +25 -25
- data/llama.cpp/LICENSE +21 -0
- data/llama.cpp/Makefile +175 -0
- data/llama.cpp/README.md +389 -0
- data/{ext/llama → llama.cpp/examples}/common.cpp +10 -3
- data/llama.cpp/examples/main/main.cpp +460 -0
- data/{ext/llama → llama.cpp}/ggml.c +587 -485
- data/{ext/llama → llama.cpp}/ggml.h +36 -26
- data/{ext/llama → llama.cpp}/llama.cpp +85 -35
- data/{ext/llama → llama.cpp}/llama.h +17 -0
- metadata +18 -27
- data/ext/llama/extconf.rb +0 -12
- data/ext/llama/model.cpp +0 -192
- /data/{ext/llama → llama.cpp/examples}/common.h +0 -0
@@ -16,6 +16,7 @@
|
|
16
16
|
#include <stdlib.h>
|
17
17
|
#include <string.h>
|
18
18
|
#include <stdint.h>
|
19
|
+
#include <inttypes.h>
|
19
20
|
#include <stdio.h>
|
20
21
|
#include <float.h>
|
21
22
|
|
@@ -1961,42 +1962,71 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
|
|
1961
1962
|
// Initialize accumulator with zeros
|
1962
1963
|
__m256 acc = _mm256_setzero_ps();
|
1963
1964
|
|
1964
|
-
|
1965
|
-
|
1966
|
-
|
1967
|
-
#pragma GCC unroll 16
|
1968
|
-
#endif
|
1969
|
-
for (int i = 0; i < nb; ++i) {
|
1970
|
-
// Compute combined scale for the block
|
1971
|
-
const __m256 d = _mm256_mul_ps( _mm256_broadcast_ss( &x[i].d ), _mm256_broadcast_ss( &y[i].d ) );
|
1972
|
-
|
1973
|
-
// Load 16 bytes, and unpack 4 bit fields into bytes, making 32 bytes
|
1974
|
-
__m256i bx = bytesFromNibbles( x[i].qs );
|
1975
|
-
__m256i by = bytesFromNibbles( y[i].qs );
|
1976
|
-
|
1977
|
-
// Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval.
|
1978
|
-
const __m256i off = _mm256_set1_epi8( 8 );
|
1979
|
-
bx = _mm256_sub_epi8( bx, off );
|
1980
|
-
by = _mm256_sub_epi8( by, off );
|
1981
|
-
|
1982
|
-
// Get absolute values of x vectors
|
1983
|
-
const __m256i ax = _mm256_sign_epi8(bx, bx);
|
1984
|
-
|
1985
|
-
// Sign the values of the y vectors
|
1986
|
-
const __m256i sy = _mm256_sign_epi8(by, bx);
|
1987
|
-
|
1988
|
-
// Perform multiplication and create 16-bit values
|
1989
|
-
const __m256i dot = _mm256_maddubs_epi16(ax, sy);
|
1990
|
-
|
1991
|
-
const __m256i ones = _mm256_set1_epi16(1);
|
1992
|
-
const __m256i i32 = _mm256_madd_epi16(ones, dot);
|
1965
|
+
/* Prepare the constants we will need during execution */
|
1966
|
+
const __m256i lowMask = _mm256_set1_epi8( 0xF );
|
1967
|
+
const __m256i offset_8 = _mm256_set1_epi16( 8 );
|
1993
1968
|
|
1994
|
-
|
1995
|
-
|
1969
|
+
#define UNROLL_COUNT 8
|
1970
|
+
// make sure we only unroll multiples of the block count
|
1971
|
+
assert(nb % UNROLL_COUNT == 0);
|
1996
1972
|
|
1997
|
-
|
1998
|
-
|
1999
|
-
|
1973
|
+
// Main loop
|
1974
|
+
for (int i = 0; i < nb; i+=UNROLL_COUNT) {
|
1975
|
+
|
1976
|
+
// This loop will be unrolled by the compiler
|
1977
|
+
for (int u=0;u<UNROLL_COUNT;u++) {
|
1978
|
+
/* Compute combined scale for the block */
|
1979
|
+
const __m256 scale = _mm256_mul_ps(
|
1980
|
+
_mm256_broadcast_ss( &x[i+u].d ),
|
1981
|
+
_mm256_broadcast_ss( &y[i+u].d ) );
|
1982
|
+
|
1983
|
+
/* get input from x
|
1984
|
+
Input: 32 Nibbles (16 bytes) at *x[i+u]
|
1985
|
+
Output: 2 vectors with 16 values of type int16_t (x_high_q, x_low_q) */
|
1986
|
+
|
1987
|
+
/* Load 16 bytes from memory */
|
1988
|
+
const __m128i tmp_x = _mm_loadu_si128( ( const __m128i* ) x[i+u].qs);
|
1989
|
+
/* Expand bytes into uint16_t values */
|
1990
|
+
const __m256i bytes_x = _mm256_cvtepu8_epi16(tmp_x);
|
1991
|
+
/* Unpack values into individual bytes */
|
1992
|
+
__m256i x_low_q = _mm256_and_si256( lowMask, bytes_x );
|
1993
|
+
const __m256i pre_shift_x_high_q = _mm256_andnot_si256( lowMask, bytes_x );
|
1994
|
+
__m256i x_high_q = _mm256_srli_epi16( pre_shift_x_high_q, 4 );
|
1995
|
+
/* Now we have two vectors with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval. */
|
1996
|
+
x_high_q = _mm256_sub_epi16( x_high_q, offset_8 );
|
1997
|
+
x_low_q = _mm256_sub_epi16( x_low_q, offset_8 );
|
1998
|
+
|
1999
|
+
/* get input from y
|
2000
|
+
Input: 32 Nibbles (16 bytes) at *y[i+u]
|
2001
|
+
Output: 2 vectors with 16 values of type int16_t (y_high_q, y_low_q) */
|
2002
|
+
|
2003
|
+
/* Load 16 bytes from memory */
|
2004
|
+
const __m128i tmp_y = _mm_loadu_si128( (const __m128i* ) y[i+u].qs);
|
2005
|
+
/* Expand bytes into uint16_t values */
|
2006
|
+
const __m256i bytes_y = _mm256_cvtepu8_epi16(tmp_y);
|
2007
|
+
/* Unpack values into individual bytes */
|
2008
|
+
const __m256i pre_shift_y_high_q = _mm256_andnot_si256( lowMask, bytes_y );
|
2009
|
+
__m256i y_high_q = _mm256_srli_epi16( pre_shift_y_high_q, 4 );
|
2010
|
+
__m256i y_low_q = _mm256_and_si256( lowMask, bytes_y );
|
2011
|
+
/* Now we have two vectors with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval. */
|
2012
|
+
y_high_q = _mm256_sub_epi16( y_high_q, offset_8 );
|
2013
|
+
y_low_q = _mm256_sub_epi16( y_low_q, offset_8 );
|
2014
|
+
|
2015
|
+
/* Compute products of int16_t integers, add pairwise, store as int32_t */
|
2016
|
+
__m256i xy_high_q = _mm256_madd_epi16( x_high_q, y_high_q );
|
2017
|
+
__m256i xy_low_q = _mm256_madd_epi16( x_low_q, y_low_q );
|
2018
|
+
|
2019
|
+
/* Accumulate the products of int32_t integers -> we now have a vector of 8 int_32t */
|
2020
|
+
__m256i xy_q = _mm256_add_epi32( xy_high_q, xy_low_q );
|
2021
|
+
|
2022
|
+
/* Convert to vectore of 8 int32_t to 8 floats */
|
2023
|
+
__m256 q = _mm256_cvtepi32_ps( xy_q );
|
2024
|
+
|
2025
|
+
/* Multiply q with scale and accumulate */
|
2026
|
+
acc = _mm256_fmadd_ps( scale, q, acc );
|
2027
|
+
}
|
2028
|
+
|
2029
|
+
}
|
2000
2030
|
|
2001
2031
|
// Return horizontal sum of the acc vector
|
2002
2032
|
__m128 res = _mm256_extractf128_ps( acc, 1 );
|
@@ -2025,7 +2055,7 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest
|
|
2025
2055
|
bx = _mm_sub_epi8( bx, off );
|
2026
2056
|
by = _mm_sub_epi8( by, off );
|
2027
2057
|
|
2028
|
-
|
2058
|
+
// Get absolute values of x vectors
|
2029
2059
|
const __m128i ax = _mm_sign_epi8(bx, bx);
|
2030
2060
|
|
2031
2061
|
// Sign the values of the y vectors
|
@@ -2774,7 +2804,7 @@ void ggml_print_objects(const struct ggml_context * ctx) {
|
|
2774
2804
|
GGML_PRINT("%s: --- end ---\n", __func__);
|
2775
2805
|
}
|
2776
2806
|
|
2777
|
-
|
2807
|
+
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
|
2778
2808
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
2779
2809
|
|
2780
2810
|
return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
|
@@ -3090,7 +3120,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
3090
3120
|
struct ggml_context * ctx,
|
3091
3121
|
enum ggml_type type,
|
3092
3122
|
int n_dims,
|
3093
|
-
const
|
3123
|
+
const int64_t* ne,
|
3094
3124
|
void* data) {
|
3095
3125
|
// always insert objects at the end of the context's memory pool
|
3096
3126
|
struct ggml_object * obj_cur = ctx->objects_end;
|
@@ -3189,7 +3219,8 @@ struct ggml_tensor * ggml_new_tensor_impl(
|
|
3189
3219
|
/*.pad =*/ { 0 },
|
3190
3220
|
};
|
3191
3221
|
|
3192
|
-
|
3222
|
+
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
|
3223
|
+
//ggml_assert_aligned(result->data);
|
3193
3224
|
|
3194
3225
|
for (int i = 0; i < n_dims; i++) {
|
3195
3226
|
result->ne[i] = ne[i];
|
@@ -3210,44 +3241,44 @@ struct ggml_tensor * ggml_new_tensor(
|
|
3210
3241
|
struct ggml_context * ctx,
|
3211
3242
|
enum ggml_type type,
|
3212
3243
|
int n_dims,
|
3213
|
-
const
|
3244
|
+
const int64_t * ne) {
|
3214
3245
|
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
3215
3246
|
}
|
3216
3247
|
|
3217
3248
|
struct ggml_tensor * ggml_new_tensor_1d(
|
3218
3249
|
struct ggml_context * ctx,
|
3219
3250
|
enum ggml_type type,
|
3220
|
-
|
3251
|
+
int64_t ne0) {
|
3221
3252
|
return ggml_new_tensor(ctx, type, 1, &ne0);
|
3222
3253
|
}
|
3223
3254
|
|
3224
3255
|
struct ggml_tensor * ggml_new_tensor_2d(
|
3225
3256
|
struct ggml_context * ctx,
|
3226
3257
|
enum ggml_type type,
|
3227
|
-
|
3228
|
-
|
3229
|
-
const
|
3258
|
+
int64_t ne0,
|
3259
|
+
int64_t ne1) {
|
3260
|
+
const int64_t ne[2] = { ne0, ne1 };
|
3230
3261
|
return ggml_new_tensor(ctx, type, 2, ne);
|
3231
3262
|
}
|
3232
3263
|
|
3233
3264
|
struct ggml_tensor * ggml_new_tensor_3d(
|
3234
3265
|
struct ggml_context * ctx,
|
3235
3266
|
enum ggml_type type,
|
3236
|
-
|
3237
|
-
|
3238
|
-
|
3239
|
-
const
|
3267
|
+
int64_t ne0,
|
3268
|
+
int64_t ne1,
|
3269
|
+
int64_t ne2) {
|
3270
|
+
const int64_t ne[3] = { ne0, ne1, ne2 };
|
3240
3271
|
return ggml_new_tensor(ctx, type, 3, ne);
|
3241
3272
|
}
|
3242
3273
|
|
3243
3274
|
struct ggml_tensor * ggml_new_tensor_4d(
|
3244
3275
|
struct ggml_context * ctx,
|
3245
3276
|
enum ggml_type type,
|
3246
|
-
|
3247
|
-
|
3248
|
-
|
3249
|
-
|
3250
|
-
const
|
3277
|
+
int64_t ne0,
|
3278
|
+
int64_t ne1,
|
3279
|
+
int64_t ne2,
|
3280
|
+
int64_t ne3) {
|
3281
|
+
const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
|
3251
3282
|
return ggml_new_tensor(ctx, type, 4, ne);
|
3252
3283
|
}
|
3253
3284
|
|
@@ -3590,7 +3621,14 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|
3590
3621
|
struct ggml_tensor * ggml_view_tensor(
|
3591
3622
|
struct ggml_context * ctx,
|
3592
3623
|
const struct ggml_tensor * src) {
|
3593
|
-
|
3624
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src->data);
|
3625
|
+
|
3626
|
+
result->nb[0] = src->nb[0];
|
3627
|
+
result->nb[1] = src->nb[1];
|
3628
|
+
result->nb[2] = src->nb[2];
|
3629
|
+
result->nb[3] = src->nb[3];
|
3630
|
+
|
3631
|
+
return result;
|
3594
3632
|
}
|
3595
3633
|
|
3596
3634
|
////////////////////////////////////////////////////////////////////////////////
|
@@ -3894,7 +3932,7 @@ struct ggml_tensor * ggml_mean(
|
|
3894
3932
|
is_node = true;
|
3895
3933
|
}
|
3896
3934
|
|
3897
|
-
|
3935
|
+
int64_t ne[GGML_MAX_DIMS] = { 1, a->ne[1], a->ne[2], a->ne[3] };
|
3898
3936
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, a->n_dims, ne);
|
3899
3937
|
|
3900
3938
|
result->op = GGML_OP_MEAN;
|
@@ -4255,7 +4293,7 @@ struct ggml_tensor * ggml_mul_mat(
|
|
4255
4293
|
is_node = true;
|
4256
4294
|
}
|
4257
4295
|
|
4258
|
-
const
|
4296
|
+
const int64_t ne[4] = { a->ne[1], b->ne[1], a->ne[2], b->ne[3] };
|
4259
4297
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, MIN(a->n_dims, b->n_dims), ne);
|
4260
4298
|
|
4261
4299
|
result->op = GGML_OP_MUL_MAT;
|
@@ -4380,8 +4418,8 @@ struct ggml_tensor * ggml_reshape(
|
|
4380
4418
|
struct ggml_tensor * ggml_reshape_2d(
|
4381
4419
|
struct ggml_context * ctx,
|
4382
4420
|
struct ggml_tensor * a,
|
4383
|
-
|
4384
|
-
|
4421
|
+
int64_t ne0,
|
4422
|
+
int64_t ne1) {
|
4385
4423
|
GGML_ASSERT(ggml_is_contiguous(a));
|
4386
4424
|
GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
|
4387
4425
|
|
@@ -4392,7 +4430,7 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
4392
4430
|
is_node = true;
|
4393
4431
|
}
|
4394
4432
|
|
4395
|
-
const
|
4433
|
+
const int64_t ne[2] = { ne0, ne1 };
|
4396
4434
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a->data);
|
4397
4435
|
|
4398
4436
|
result->op = GGML_OP_RESHAPE;
|
@@ -4406,9 +4444,9 @@ struct ggml_tensor * ggml_reshape_2d(
|
|
4406
4444
|
struct ggml_tensor * ggml_reshape_3d(
|
4407
4445
|
struct ggml_context * ctx,
|
4408
4446
|
struct ggml_tensor * a,
|
4409
|
-
|
4410
|
-
|
4411
|
-
|
4447
|
+
int64_t ne0,
|
4448
|
+
int64_t ne1,
|
4449
|
+
int64_t ne2) {
|
4412
4450
|
GGML_ASSERT(ggml_is_contiguous(a));
|
4413
4451
|
GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
|
4414
4452
|
|
@@ -4419,7 +4457,7 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
4419
4457
|
is_node = true;
|
4420
4458
|
}
|
4421
4459
|
|
4422
|
-
const
|
4460
|
+
const int64_t ne[3] = { ne0, ne1, ne2 };
|
4423
4461
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a->data);
|
4424
4462
|
|
4425
4463
|
result->op = GGML_OP_RESHAPE;
|
@@ -4435,7 +4473,7 @@ struct ggml_tensor * ggml_reshape_3d(
|
|
4435
4473
|
struct ggml_tensor * ggml_view_1d(
|
4436
4474
|
struct ggml_context * ctx,
|
4437
4475
|
struct ggml_tensor * a,
|
4438
|
-
|
4476
|
+
int64_t ne0,
|
4439
4477
|
size_t offset) {
|
4440
4478
|
if (a->grad) {
|
4441
4479
|
GGML_ASSERT(false); // gradient propagation is not supported
|
@@ -4456,15 +4494,15 @@ struct ggml_tensor * ggml_view_1d(
|
|
4456
4494
|
struct ggml_tensor * ggml_view_2d(
|
4457
4495
|
struct ggml_context * ctx,
|
4458
4496
|
struct ggml_tensor * a,
|
4459
|
-
|
4460
|
-
|
4497
|
+
int64_t ne0,
|
4498
|
+
int64_t ne1,
|
4461
4499
|
size_t nb1,
|
4462
4500
|
size_t offset) {
|
4463
4501
|
if (a->grad) {
|
4464
4502
|
GGML_ASSERT(false); // gradient propagation is not supported
|
4465
4503
|
}
|
4466
4504
|
|
4467
|
-
const
|
4505
|
+
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
4468
4506
|
|
4469
4507
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
4470
4508
|
|
@@ -4480,6 +4518,37 @@ struct ggml_tensor * ggml_view_2d(
|
|
4480
4518
|
return result;
|
4481
4519
|
}
|
4482
4520
|
|
4521
|
+
// ggml_view_3d
|
4522
|
+
|
4523
|
+
struct ggml_tensor * ggml_view_3d(
|
4524
|
+
struct ggml_context * ctx,
|
4525
|
+
struct ggml_tensor * a,
|
4526
|
+
int64_t ne0,
|
4527
|
+
int64_t ne1,
|
4528
|
+
int64_t ne2,
|
4529
|
+
size_t nb1,
|
4530
|
+
size_t nb2,
|
4531
|
+
size_t offset) {
|
4532
|
+
if (a->grad) {
|
4533
|
+
GGML_ASSERT(false); // gradient propagation is not supported
|
4534
|
+
}
|
4535
|
+
|
4536
|
+
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
4537
|
+
|
4538
|
+
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
4539
|
+
|
4540
|
+
result->nb[1] = nb1;
|
4541
|
+
result->nb[2] = nb2;
|
4542
|
+
result->nb[3] = result->nb[2]*ne2;
|
4543
|
+
|
4544
|
+
result->op = GGML_OP_VIEW;
|
4545
|
+
result->grad = NULL;
|
4546
|
+
result->src0 = a;
|
4547
|
+
result->src1 = NULL; // TODO: maybe store the offset here?
|
4548
|
+
|
4549
|
+
return result;
|
4550
|
+
}
|
4551
|
+
|
4483
4552
|
// ggml_permute
|
4484
4553
|
|
4485
4554
|
struct ggml_tensor * ggml_permute(
|
@@ -4695,7 +4764,7 @@ struct ggml_tensor * ggml_conv_1d_1s(
|
|
4695
4764
|
is_node = true;
|
4696
4765
|
}
|
4697
4766
|
|
4698
|
-
const
|
4767
|
+
const int64_t ne[4] = { b->ne[0], a->ne[2], 1, 1, };
|
4699
4768
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
4700
4769
|
|
4701
4770
|
result->op = GGML_OP_CONV_1D_1S;
|
@@ -4722,7 +4791,7 @@ struct ggml_tensor * ggml_conv_1d_2s(
|
|
4722
4791
|
is_node = true;
|
4723
4792
|
}
|
4724
4793
|
|
4725
|
-
const
|
4794
|
+
const int64_t ne[4] = { b->ne[0]/2, a->ne[2], 1, 1, };
|
4726
4795
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
4727
4796
|
|
4728
4797
|
result->op = GGML_OP_CONV_1D_2S;
|
@@ -4815,102 +4884,112 @@ static void ggml_compute_forward_dup_f16(
|
|
4815
4884
|
const struct ggml_tensor * src0,
|
4816
4885
|
struct ggml_tensor * dst) {
|
4817
4886
|
GGML_ASSERT(params->ith == 0);
|
4818
|
-
GGML_ASSERT(ggml_is_contiguous(dst));
|
4819
4887
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
4820
4888
|
|
4821
4889
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
4822
4890
|
return;
|
4823
4891
|
}
|
4824
4892
|
|
4825
|
-
const
|
4826
|
-
const
|
4827
|
-
const
|
4828
|
-
const
|
4893
|
+
const int64_t ne00 = src0->ne[0];
|
4894
|
+
const int64_t ne01 = src0->ne[1];
|
4895
|
+
const int64_t ne02 = src0->ne[2];
|
4896
|
+
const int64_t ne03 = src0->ne[3];
|
4829
4897
|
|
4830
4898
|
const size_t nb00 = src0->nb[0];
|
4831
4899
|
const size_t nb01 = src0->nb[1];
|
4832
4900
|
const size_t nb02 = src0->nb[2];
|
4833
4901
|
const size_t nb03 = src0->nb[3];
|
4834
4902
|
|
4835
|
-
|
4903
|
+
const size_t nb0 = dst->nb[0];
|
4904
|
+
const size_t nb1 = dst->nb[1];
|
4905
|
+
const size_t nb2 = dst->nb[2];
|
4906
|
+
const size_t nb3 = dst->nb[3];
|
4907
|
+
|
4908
|
+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
4836
4909
|
memcpy(dst->data, src0->data, ggml_nelements(dst) * GGML_TYPE_SIZE[src0->type]);
|
4837
4910
|
return;
|
4838
4911
|
}
|
4839
4912
|
|
4840
|
-
if (src0->
|
4841
|
-
|
4842
|
-
|
4843
|
-
|
4844
|
-
|
4845
|
-
|
4846
|
-
|
4847
|
-
|
4848
|
-
|
4849
|
-
|
4850
|
-
|
4851
|
-
|
4852
|
-
|
4853
|
-
id++;
|
4854
|
-
}
|
4855
|
-
}
|
4856
|
-
}
|
4857
|
-
} else if (dst->type == GGML_TYPE_F32) {
|
4858
|
-
size_t id = 0;
|
4859
|
-
float * dst_ptr = (float *) dst->data;
|
4860
|
-
|
4861
|
-
for (int i03 = 0; i03 < ne03; i03++) {
|
4862
|
-
for (int i02 = 0; i02 < ne02; i02++) {
|
4863
|
-
for (int i01 = 0; i01 < ne01; i01++) {
|
4864
|
-
for (int i00 = 0; i00 < ne00; i00++) {
|
4865
|
-
const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
4866
|
-
|
4867
|
-
dst_ptr[id] = GGML_FP16_TO_FP32(*src0_ptr);
|
4868
|
-
id++;
|
4869
|
-
}
|
4870
|
-
}
|
4913
|
+
if (src0->type == dst->type &&
|
4914
|
+
src0->ne[0] == dst->ne[0] &&
|
4915
|
+
src0->nb[0] == GGML_TYPE_SIZE[src0->type] && dst->nb[0] == GGML_TYPE_SIZE[dst->type]) {
|
4916
|
+
// copy by rows
|
4917
|
+
const size_t rs = ne00*nb00;
|
4918
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
4919
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
4920
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
4921
|
+
memcpy(
|
4922
|
+
((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3),
|
4923
|
+
((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03),
|
4924
|
+
rs);
|
4871
4925
|
}
|
4872
4926
|
}
|
4873
|
-
} else {
|
4874
|
-
GGML_ASSERT(false); // TODO: implement
|
4875
4927
|
}
|
4876
|
-
|
4877
|
-
|
4878
|
-
|
4879
|
-
if (dst->type == GGML_TYPE_F32) {
|
4880
|
-
size_t id = 0;
|
4881
|
-
float * dst_ptr = (float *) dst->data;
|
4882
|
-
|
4883
|
-
for (int i03 = 0; i03 < ne03; i03++) {
|
4884
|
-
for (int i02 = 0; i02 < ne02; i02++) {
|
4885
|
-
for (int i01 = 0; i01 < ne01; i01++) {
|
4886
|
-
for (int i00 = 0; i00 < ne00; i00++) {
|
4887
|
-
const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
4928
|
+
return;
|
4929
|
+
}
|
4888
4930
|
|
4889
|
-
|
4890
|
-
|
4931
|
+
// TODO: add more special-case implementations for tensor shapes/strides that can benefit from memcpy
|
4932
|
+
|
4933
|
+
// dst counters
|
4934
|
+
int64_t i10 = 0;
|
4935
|
+
int64_t i11 = 0;
|
4936
|
+
int64_t i12 = 0;
|
4937
|
+
int64_t i13 = 0;
|
4938
|
+
|
4939
|
+
if (dst->type == GGML_TYPE_F16) {
|
4940
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
4941
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
4942
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
4943
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
4944
|
+
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
4945
|
+
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
4946
|
+
|
4947
|
+
memcpy(dst_ptr, src0_ptr, sizeof(ggml_fp16_t));
|
4948
|
+
|
4949
|
+
if (++i10 == ne00) {
|
4950
|
+
i10 = 0;
|
4951
|
+
if (++i11 == ne01) {
|
4952
|
+
i11 = 0;
|
4953
|
+
if (++i12 == ne02) {
|
4954
|
+
i12 = 0;
|
4955
|
+
if (++i13 == ne03) {
|
4956
|
+
i13 = 0;
|
4957
|
+
}
|
4958
|
+
}
|
4959
|
+
}
|
4891
4960
|
}
|
4892
4961
|
}
|
4893
4962
|
}
|
4894
4963
|
}
|
4895
|
-
}
|
4896
|
-
|
4897
|
-
|
4898
|
-
|
4899
|
-
|
4900
|
-
|
4901
|
-
|
4902
|
-
|
4903
|
-
|
4904
|
-
|
4905
|
-
|
4906
|
-
|
4964
|
+
}
|
4965
|
+
} else if (dst->type == GGML_TYPE_F32) {
|
4966
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
4967
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
4968
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
4969
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
4970
|
+
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
4971
|
+
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
4972
|
+
|
4973
|
+
*(float *) dst_ptr = GGML_FP16_TO_FP32(*(const ggml_fp16_t *) src0_ptr);
|
4974
|
+
|
4975
|
+
if (++i10 == ne00) {
|
4976
|
+
i10 = 0;
|
4977
|
+
if (++i11 == ne01) {
|
4978
|
+
i11 = 0;
|
4979
|
+
if (++i12 == ne02) {
|
4980
|
+
i12 = 0;
|
4981
|
+
if (++i13 == ne03) {
|
4982
|
+
i13 = 0;
|
4983
|
+
}
|
4984
|
+
}
|
4985
|
+
}
|
4907
4986
|
}
|
4908
4987
|
}
|
4909
4988
|
}
|
4910
4989
|
}
|
4911
|
-
} else {
|
4912
|
-
GGML_ASSERT(false); // TODO: implement
|
4913
4990
|
}
|
4991
|
+
} else {
|
4992
|
+
GGML_ASSERT(false); // TODO: implement
|
4914
4993
|
}
|
4915
4994
|
}
|
4916
4995
|
|
@@ -4919,102 +4998,92 @@ static void ggml_compute_forward_dup_f32(
|
|
4919
4998
|
const struct ggml_tensor * src0,
|
4920
4999
|
struct ggml_tensor * dst) {
|
4921
5000
|
GGML_ASSERT(params->ith == 0);
|
4922
|
-
GGML_ASSERT(ggml_is_contiguous(dst));
|
4923
5001
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
4924
5002
|
|
4925
5003
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
4926
5004
|
return;
|
4927
5005
|
}
|
4928
5006
|
|
4929
|
-
const
|
4930
|
-
const
|
4931
|
-
const
|
4932
|
-
const
|
5007
|
+
const int64_t ne00 = src0->ne[0];
|
5008
|
+
const int64_t ne01 = src0->ne[1];
|
5009
|
+
const int64_t ne02 = src0->ne[2];
|
5010
|
+
const int64_t ne03 = src0->ne[3];
|
4933
5011
|
|
4934
5012
|
const size_t nb00 = src0->nb[0];
|
4935
5013
|
const size_t nb01 = src0->nb[1];
|
4936
5014
|
const size_t nb02 = src0->nb[2];
|
4937
5015
|
const size_t nb03 = src0->nb[3];
|
4938
5016
|
|
4939
|
-
|
5017
|
+
const size_t nb0 = dst->nb[0];
|
5018
|
+
const size_t nb1 = dst->nb[1];
|
5019
|
+
const size_t nb2 = dst->nb[2];
|
5020
|
+
const size_t nb3 = dst->nb[3];
|
5021
|
+
|
5022
|
+
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
4940
5023
|
memcpy(dst->data, src0->data, ggml_nelements(dst) * GGML_TYPE_SIZE[src0->type]);
|
4941
5024
|
return;
|
4942
5025
|
}
|
4943
5026
|
|
4944
|
-
|
4945
|
-
|
4946
|
-
|
4947
|
-
|
4948
|
-
|
4949
|
-
|
4950
|
-
|
4951
|
-
|
4952
|
-
|
4953
|
-
|
4954
|
-
|
4955
|
-
|
4956
|
-
|
4957
|
-
|
4958
|
-
|
4959
|
-
|
4960
|
-
|
4961
|
-
|
4962
|
-
|
4963
|
-
|
4964
|
-
|
4965
|
-
|
4966
|
-
|
4967
|
-
|
4968
|
-
|
4969
|
-
|
4970
|
-
|
4971
|
-
dst_ptr[id] = GGML_FP32_TO_FP16(*src0_ptr);
|
4972
|
-
id++;
|
5027
|
+
// dst counters
|
5028
|
+
int64_t i10 = 0;
|
5029
|
+
int64_t i11 = 0;
|
5030
|
+
int64_t i12 = 0;
|
5031
|
+
int64_t i13 = 0;
|
5032
|
+
|
5033
|
+
if (dst->type == GGML_TYPE_F32) {
|
5034
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
5035
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
5036
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
5037
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
5038
|
+
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
5039
|
+
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
5040
|
+
|
5041
|
+
memcpy(dst_ptr, src0_ptr, sizeof(float));
|
5042
|
+
|
5043
|
+
if (++i10 == dst->ne[0]) {
|
5044
|
+
i10 = 0;
|
5045
|
+
if (++i11 == dst->ne[1]) {
|
5046
|
+
i11 = 0;
|
5047
|
+
if (++i12 == dst->ne[2]) {
|
5048
|
+
i12 = 0;
|
5049
|
+
if (++i13 == dst->ne[3]) {
|
5050
|
+
i13 = 0;
|
5051
|
+
}
|
5052
|
+
}
|
5053
|
+
}
|
4973
5054
|
}
|
4974
5055
|
}
|
4975
5056
|
}
|
4976
5057
|
}
|
4977
|
-
} else {
|
4978
|
-
GGML_ASSERT(false); // TODO: implement
|
4979
5058
|
}
|
4980
|
-
} else {
|
4981
|
-
|
4982
|
-
|
4983
|
-
|
4984
|
-
|
4985
|
-
|
4986
|
-
|
4987
|
-
|
4988
|
-
|
4989
|
-
|
4990
|
-
|
4991
|
-
|
4992
|
-
|
4993
|
-
|
4994
|
-
|
4995
|
-
|
4996
|
-
|
4997
|
-
|
4998
|
-
|
4999
|
-
|
5000
|
-
|
5001
|
-
ggml_fp16_t * dst_ptr = (ggml_fp16_t *) dst->data;
|
5002
|
-
|
5003
|
-
for (int i03 = 0; i03 < ne03; i03++) {
|
5004
|
-
for (int i02 = 0; i02 < ne02; i02++) {
|
5005
|
-
for (int i01 = 0; i01 < ne01; i01++) {
|
5006
|
-
for (int i00 = 0; i00 < ne00; i00++) {
|
5007
|
-
const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
5008
|
-
|
5009
|
-
dst_ptr[id] = GGML_FP32_TO_FP16(*src0_ptr);
|
5010
|
-
id++;
|
5059
|
+
} else if (dst->type == GGML_TYPE_F16) {
|
5060
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
5061
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
5062
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
5063
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
5064
|
+
const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03);
|
5065
|
+
char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3);
|
5066
|
+
|
5067
|
+
*(ggml_fp16_t *) dst_ptr = GGML_FP32_TO_FP16(*(const float *) src0_ptr);
|
5068
|
+
|
5069
|
+
if (++i10 == dst->ne[0]) {
|
5070
|
+
i10 = 0;
|
5071
|
+
if (++i11 == dst->ne[1]) {
|
5072
|
+
i11 = 0;
|
5073
|
+
if (++i12 == dst->ne[2]) {
|
5074
|
+
i12 = 0;
|
5075
|
+
if (++i13 == dst->ne[3]) {
|
5076
|
+
i13 = 0;
|
5077
|
+
}
|
5078
|
+
}
|
5079
|
+
}
|
5011
5080
|
}
|
5012
5081
|
}
|
5013
5082
|
}
|
5014
5083
|
}
|
5015
|
-
} else {
|
5016
|
-
GGML_ASSERT(false); // TODO: implement
|
5017
5084
|
}
|
5085
|
+
} else {
|
5086
|
+
GGML_ASSERT(false); // TODO: implement
|
5018
5087
|
}
|
5019
5088
|
}
|
5020
5089
|
|
@@ -5389,18 +5458,18 @@ static void ggml_compute_forward_sum_f32(
|
|
5389
5458
|
assert(ggml_is_scalar(dst));
|
5390
5459
|
assert(src0->nb[0] == sizeof(float));
|
5391
5460
|
|
5392
|
-
const
|
5393
|
-
const
|
5394
|
-
const
|
5395
|
-
const
|
5461
|
+
const int64_t ne00 = src0->ne[0];
|
5462
|
+
const int64_t ne01 = src0->ne[1];
|
5463
|
+
const int64_t ne02 = src0->ne[2];
|
5464
|
+
const int64_t ne03 = src0->ne[3];
|
5396
5465
|
|
5397
5466
|
const size_t nb01 = src0->nb[1];
|
5398
5467
|
const size_t nb02 = src0->nb[2];
|
5399
5468
|
const size_t nb03 = src0->nb[3];
|
5400
5469
|
|
5401
|
-
for (
|
5402
|
-
for (
|
5403
|
-
for (
|
5470
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
5471
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
5472
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
5404
5473
|
ggml_vec_sum_f32(ne00,
|
5405
5474
|
(float *) (dst->data),
|
5406
5475
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
@@ -5445,19 +5514,19 @@ static void ggml_compute_forward_mean_f32(
|
|
5445
5514
|
|
5446
5515
|
assert(src0->nb[0] == sizeof(float));
|
5447
5516
|
|
5448
|
-
const
|
5449
|
-
const
|
5450
|
-
const
|
5451
|
-
const
|
5517
|
+
const int64_t ne00 = src0->ne[0];
|
5518
|
+
const int64_t ne01 = src0->ne[1];
|
5519
|
+
const int64_t ne02 = src0->ne[2];
|
5520
|
+
const int64_t ne03 = src0->ne[3];
|
5452
5521
|
|
5453
5522
|
const size_t nb01 = src0->nb[1];
|
5454
5523
|
const size_t nb02 = src0->nb[2];
|
5455
5524
|
const size_t nb03 = src0->nb[3];
|
5456
5525
|
|
5457
|
-
const
|
5458
|
-
const
|
5459
|
-
const
|
5460
|
-
const
|
5526
|
+
const int64_t ne0 = dst->ne[0];
|
5527
|
+
const int64_t ne1 = dst->ne[1];
|
5528
|
+
const int64_t ne2 = dst->ne[2];
|
5529
|
+
const int64_t ne3 = dst->ne[3];
|
5461
5530
|
|
5462
5531
|
assert(ne0 == 1);
|
5463
5532
|
assert(ne1 == ne01);
|
@@ -5473,9 +5542,9 @@ static void ggml_compute_forward_mean_f32(
|
|
5473
5542
|
const size_t nb2 = dst->nb[2];
|
5474
5543
|
const size_t nb3 = dst->nb[3];
|
5475
5544
|
|
5476
|
-
for (
|
5477
|
-
for (
|
5478
|
-
for (
|
5545
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
5546
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
5547
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
5479
5548
|
ggml_vec_sum_f32(ne00,
|
5480
5549
|
(float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3),
|
5481
5550
|
(float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03));
|
@@ -5962,10 +6031,10 @@ static void ggml_compute_forward_norm_f32(
|
|
5962
6031
|
const int ith = params->ith;
|
5963
6032
|
const int nth = params->nth;
|
5964
6033
|
|
5965
|
-
const
|
5966
|
-
const
|
5967
|
-
const
|
5968
|
-
const
|
6034
|
+
const int64_t ne00 = src0->ne[0];
|
6035
|
+
const int64_t ne01 = src0->ne[1];
|
6036
|
+
const int64_t ne02 = src0->ne[2];
|
6037
|
+
const int64_t ne03 = src0->ne[3];
|
5969
6038
|
|
5970
6039
|
const size_t nb01 = src0->nb[1];
|
5971
6040
|
const size_t nb02 = src0->nb[2];
|
@@ -5978,13 +6047,13 @@ static void ggml_compute_forward_norm_f32(
|
|
5978
6047
|
const float eps = 1e-5f; // TODO: make this a parameter
|
5979
6048
|
|
5980
6049
|
// TODO: optimize
|
5981
|
-
for (
|
5982
|
-
for (
|
5983
|
-
for (
|
6050
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
6051
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
6052
|
+
for (int64_t i01 = ith; i01 < ne01; i01 += nth) {
|
5984
6053
|
const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
5985
6054
|
|
5986
6055
|
ggml_float sum = 0.0;
|
5987
|
-
for (
|
6056
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
5988
6057
|
sum += (ggml_float)x[i00];
|
5989
6058
|
}
|
5990
6059
|
|
@@ -5993,7 +6062,7 @@ static void ggml_compute_forward_norm_f32(
|
|
5993
6062
|
float * y = (float *) ((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3);
|
5994
6063
|
|
5995
6064
|
ggml_float sum2 = 0.0;
|
5996
|
-
for (
|
6065
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
5997
6066
|
float v = x[i00] - mean;
|
5998
6067
|
y[i00] = v;
|
5999
6068
|
sum2 += (ggml_float)(v*v);
|
@@ -6045,10 +6114,10 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
6045
6114
|
const int ith = params->ith;
|
6046
6115
|
const int nth = params->nth;
|
6047
6116
|
|
6048
|
-
const
|
6049
|
-
const
|
6050
|
-
const
|
6051
|
-
const
|
6117
|
+
const int64_t ne00 = src0->ne[0];
|
6118
|
+
const int64_t ne01 = src0->ne[1];
|
6119
|
+
const int64_t ne02 = src0->ne[2];
|
6120
|
+
const int64_t ne03 = src0->ne[3];
|
6052
6121
|
|
6053
6122
|
const size_t nb01 = src0->nb[1];
|
6054
6123
|
const size_t nb02 = src0->nb[2];
|
@@ -6061,13 +6130,13 @@ static void ggml_compute_forward_rms_norm_f32(
|
|
6061
6130
|
const float eps = 1e-6f; // TODO: make this a parameter
|
6062
6131
|
|
6063
6132
|
// TODO: optimize
|
6064
|
-
for (
|
6065
|
-
for (
|
6066
|
-
for (
|
6133
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
6134
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
6135
|
+
for (int64_t i01 = ith; i01 < ne01; i01 += nth) {
|
6067
6136
|
const float * x = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03);
|
6068
6137
|
|
6069
6138
|
ggml_float sum = 0.0;
|
6070
|
-
for (
|
6139
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
6071
6140
|
sum += (ggml_float)(x[i00] * x[i00]);
|
6072
6141
|
}
|
6073
6142
|
|
@@ -6120,13 +6189,13 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|
6120
6189
|
const struct ggml_tensor * src0,
|
6121
6190
|
const struct ggml_tensor * src1,
|
6122
6191
|
struct ggml_tensor * dst) {
|
6123
|
-
//const
|
6124
|
-
//const
|
6192
|
+
//const int64_t ne00 = src0->ne[0];
|
6193
|
+
//const int64_t ne01 = src0->ne[1];
|
6125
6194
|
|
6126
|
-
const
|
6195
|
+
const int64_t ne10 = src1->ne[0];
|
6127
6196
|
|
6128
|
-
const
|
6129
|
-
const
|
6197
|
+
const int64_t ne0 = dst->ne[0];
|
6198
|
+
const int64_t ne1 = dst->ne[1];
|
6130
6199
|
|
6131
6200
|
// TODO: find the optimal values for these
|
6132
6201
|
if (ggml_is_contiguous(src0) &&
|
@@ -6148,23 +6217,23 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
6148
6217
|
int64_t t0 = ggml_perf_time_us();
|
6149
6218
|
UNUSED(t0);
|
6150
6219
|
|
6151
|
-
const
|
6152
|
-
const
|
6153
|
-
const
|
6154
|
-
const
|
6220
|
+
const int64_t ne00 = src0->ne[0];
|
6221
|
+
const int64_t ne01 = src0->ne[1];
|
6222
|
+
const int64_t ne02 = src0->ne[2];
|
6223
|
+
const int64_t ne03 = src0->ne[3];
|
6155
6224
|
|
6156
6225
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
6157
|
-
const
|
6226
|
+
const int64_t ne10 = src1->ne[0];
|
6158
6227
|
#endif
|
6159
|
-
const
|
6228
|
+
const int64_t ne11 = src1->ne[1];
|
6160
6229
|
#ifndef NDEBUG
|
6161
|
-
const
|
6162
|
-
const
|
6230
|
+
const int64_t ne12 = src1->ne[2];
|
6231
|
+
const int64_t ne13 = src1->ne[3];
|
6163
6232
|
|
6164
|
-
const
|
6165
|
-
const
|
6166
|
-
const
|
6167
|
-
const
|
6233
|
+
const int64_t ne0 = dst->ne[0];
|
6234
|
+
const int64_t ne1 = dst->ne[1];
|
6235
|
+
const int64_t ne2 = dst->ne[2];
|
6236
|
+
const int64_t ne3 = dst->ne[3];
|
6168
6237
|
|
6169
6238
|
const int nb00 = src0->nb[0];
|
6170
6239
|
#endif
|
@@ -6224,8 +6293,8 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
6224
6293
|
return;
|
6225
6294
|
}
|
6226
6295
|
|
6227
|
-
for (
|
6228
|
-
for (
|
6296
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
6297
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
6229
6298
|
const float * x = (float *) ((char *) src0->data + i02*nb02 + i03*nb03);
|
6230
6299
|
const float * y = (float *) ((char *) src1->data + i02*nb12 + i03*nb13);
|
6231
6300
|
|
@@ -6272,7 +6341,7 @@ static void ggml_compute_forward_mul_mat_f32(
|
|
6272
6341
|
const int i02 = (ir - i03*ne02*ne01)/ne01;
|
6273
6342
|
const int i01 = (ir - i03*ne02*ne01 - i02*ne01);
|
6274
6343
|
|
6275
|
-
for (
|
6344
|
+
for (int64_t ic = 0; ic < ne11; ++ic) {
|
6276
6345
|
// src1 indices
|
6277
6346
|
const int i13 = i03;
|
6278
6347
|
const int i12 = i02;
|
@@ -6313,21 +6382,21 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
6313
6382
|
int64_t t0 = ggml_perf_time_us();
|
6314
6383
|
UNUSED(t0);
|
6315
6384
|
|
6316
|
-
const
|
6317
|
-
const
|
6318
|
-
const
|
6319
|
-
const
|
6385
|
+
const int64_t ne00 = src0->ne[0];
|
6386
|
+
const int64_t ne01 = src0->ne[1];
|
6387
|
+
const int64_t ne02 = src0->ne[2];
|
6388
|
+
const int64_t ne03 = src0->ne[3];
|
6320
6389
|
|
6321
|
-
const
|
6322
|
-
const
|
6323
|
-
const
|
6324
|
-
const
|
6390
|
+
const int64_t ne10 = src1->ne[0];
|
6391
|
+
const int64_t ne11 = src1->ne[1];
|
6392
|
+
const int64_t ne12 = src1->ne[2];
|
6393
|
+
const int64_t ne13 = src1->ne[3];
|
6325
6394
|
|
6326
|
-
const
|
6327
|
-
const
|
6328
|
-
const
|
6329
|
-
const
|
6330
|
-
//const
|
6395
|
+
const int64_t ne0 = dst->ne[0];
|
6396
|
+
const int64_t ne1 = dst->ne[1];
|
6397
|
+
const int64_t ne2 = dst->ne[2];
|
6398
|
+
const int64_t ne3 = dst->ne[3];
|
6399
|
+
//const int64_t ne = ne0*ne1*ne2*ne3;
|
6331
6400
|
|
6332
6401
|
const int nb00 = src0->nb[0];
|
6333
6402
|
const int nb01 = src0->nb[1];
|
@@ -6387,12 +6456,12 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
6387
6456
|
|
6388
6457
|
float * const wdata = params->wdata;
|
6389
6458
|
|
6390
|
-
for (
|
6391
|
-
for (
|
6459
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
6460
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
6392
6461
|
{
|
6393
6462
|
size_t id = 0;
|
6394
|
-
for (
|
6395
|
-
for (
|
6463
|
+
for (int64_t i01 = 0; i01 < ne01; ++i01) {
|
6464
|
+
for (int64_t i00 = 0; i00 < ne00; ++i00) {
|
6396
6465
|
wdata[id++] = GGML_FP16_TO_FP32(*(ggml_fp16_t *) ((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00));
|
6397
6466
|
}
|
6398
6467
|
}
|
@@ -6422,10 +6491,10 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
6422
6491
|
ggml_fp16_t * const wdata = params->wdata;
|
6423
6492
|
|
6424
6493
|
size_t id = 0;
|
6425
|
-
for (
|
6426
|
-
for (
|
6427
|
-
for (
|
6428
|
-
for (
|
6494
|
+
for (int64_t i13 = 0; i13 < ne13; ++i13) {
|
6495
|
+
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
6496
|
+
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
6497
|
+
for (int64_t i10 = 0; i10 < ne10; ++i10) {
|
6429
6498
|
wdata[id++] = GGML_FP32_TO_FP16(*(float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11 + i10*nb10));
|
6430
6499
|
}
|
6431
6500
|
}
|
@@ -6477,7 +6546,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
|
|
6477
6546
|
|
6478
6547
|
float * dst_col = (float *) ((char *) dst->data + (i0*nb0 + 0*nb1 + i2*nb2 + i3*nb3));
|
6479
6548
|
|
6480
|
-
for (
|
6549
|
+
for (int64_t ic = 0; ic < ne11; ++ic) {
|
6481
6550
|
ggml_vec_dot_f16(ne00, &dst_col[ic*ne0], src0_row, src1_col + ic*ne00);
|
6482
6551
|
}
|
6483
6552
|
}
|
@@ -6526,20 +6595,20 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
6526
6595
|
int64_t t0 = ggml_perf_time_us();
|
6527
6596
|
UNUSED(t0);
|
6528
6597
|
|
6529
|
-
const
|
6530
|
-
const
|
6531
|
-
const
|
6532
|
-
const
|
6598
|
+
const int64_t ne00 = src0->ne[0];
|
6599
|
+
const int64_t ne01 = src0->ne[1];
|
6600
|
+
const int64_t ne02 = src0->ne[2];
|
6601
|
+
const int64_t ne03 = src0->ne[3];
|
6533
6602
|
|
6534
|
-
const
|
6535
|
-
const
|
6536
|
-
const
|
6537
|
-
const
|
6603
|
+
const int64_t ne10 = src1->ne[0];
|
6604
|
+
const int64_t ne11 = src1->ne[1];
|
6605
|
+
const int64_t ne12 = src1->ne[2];
|
6606
|
+
const int64_t ne13 = src1->ne[3];
|
6538
6607
|
|
6539
|
-
const
|
6540
|
-
const
|
6541
|
-
const
|
6542
|
-
const
|
6608
|
+
const int64_t ne0 = dst->ne[0];
|
6609
|
+
const int64_t ne1 = dst->ne[1];
|
6610
|
+
const int64_t ne2 = dst->ne[2];
|
6611
|
+
const int64_t ne3 = dst->ne[3];
|
6543
6612
|
|
6544
6613
|
const int nb00 = src0->nb[0];
|
6545
6614
|
const int nb01 = src0->nb[1];
|
@@ -6603,11 +6672,11 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
6603
6672
|
float * const wdata = params->wdata;
|
6604
6673
|
dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q;
|
6605
6674
|
|
6606
|
-
for (
|
6607
|
-
for (
|
6675
|
+
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
6676
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
6608
6677
|
{
|
6609
6678
|
size_t id = 0;
|
6610
|
-
for (
|
6679
|
+
for (int64_t i01 = 0; i01 < ne01; ++i01) {
|
6611
6680
|
dequantize_row_q((char *) src0->data + i03*nb03 + i02*nb02 + i01*nb01, wdata + id, ne00);
|
6612
6681
|
id += ne00;
|
6613
6682
|
}
|
@@ -6637,9 +6706,9 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
6637
6706
|
char * wdata = params->wdata;
|
6638
6707
|
const size_t row_size = ne10*GGML_TYPE_SIZE[type]/GGML_BLCK_SIZE[type];
|
6639
6708
|
|
6640
|
-
for (
|
6641
|
-
for (
|
6642
|
-
for (
|
6709
|
+
for (int64_t i13 = 0; i13 < ne13; ++i13) {
|
6710
|
+
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
6711
|
+
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
6643
6712
|
quantize_row_q((float *)((char *) src1->data + i13*nb13 + i12*nb12 + i11*nb11), (void *) wdata, ne10);
|
6644
6713
|
wdata += row_size;
|
6645
6714
|
}
|
@@ -6688,7 +6757,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
|
|
6688
6757
|
|
6689
6758
|
assert(ne00 % 32 == 0);
|
6690
6759
|
|
6691
|
-
for (
|
6760
|
+
for (int64_t ic = 0; ic < ne11; ++ic) {
|
6692
6761
|
vec_dot_q(ne00, &dst_col[ic*ne0], src0_row, (void *) (src1_col + ic*row_size));
|
6693
6762
|
}
|
6694
6763
|
}
|
@@ -7169,7 +7238,6 @@ static void ggml_compute_forward_rope_f32(
|
|
7169
7238
|
const struct ggml_tensor * src0,
|
7170
7239
|
const struct ggml_tensor * src1,
|
7171
7240
|
struct ggml_tensor * dst) {
|
7172
|
-
assert(params->ith == 0);
|
7173
7241
|
assert(src1->type == GGML_TYPE_I32);
|
7174
7242
|
assert(ggml_nelements(src1) == 3);
|
7175
7243
|
|
@@ -7181,10 +7249,10 @@ static void ggml_compute_forward_rope_f32(
|
|
7181
7249
|
const int n_dims = ((int32_t *) src1->data)[1];
|
7182
7250
|
const int mode = ((int32_t *) src1->data)[2];
|
7183
7251
|
|
7184
|
-
//const
|
7185
|
-
const
|
7186
|
-
const
|
7187
|
-
const
|
7252
|
+
//const int64_t ne0 = src0->ne[0];
|
7253
|
+
const int64_t ne1 = src0->ne[1];
|
7254
|
+
const int64_t ne2 = src0->ne[2];
|
7255
|
+
const int64_t ne3 = src0->ne[3];
|
7188
7256
|
|
7189
7257
|
const int nb0 = src0->nb[0];
|
7190
7258
|
const int nb1 = src0->nb[1];
|
@@ -7196,11 +7264,28 @@ static void ggml_compute_forward_rope_f32(
|
|
7196
7264
|
|
7197
7265
|
assert(nb0 == sizeof(float));
|
7198
7266
|
|
7199
|
-
|
7200
|
-
|
7201
|
-
|
7267
|
+
const int ith = params->ith;
|
7268
|
+
const int nth = params->nth;
|
7269
|
+
|
7270
|
+
const int nr = ggml_nrows(src0);
|
7271
|
+
|
7272
|
+
// rows per thread
|
7273
|
+
const int dr = (nr + nth - 1)/nth;
|
7274
|
+
|
7275
|
+
// row range for this thread
|
7276
|
+
const int ir0 = dr*ith;
|
7277
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
7278
|
+
|
7279
|
+
// row index used to determine which thread to use
|
7280
|
+
int ir = 0;
|
7281
|
+
|
7282
|
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
7283
|
+
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
7202
7284
|
const int p = (mode == 0 ? n_past + i2 : i2);
|
7203
|
-
for (
|
7285
|
+
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
7286
|
+
if (ir++ < ir0) continue;
|
7287
|
+
if (ir > ir1) break;
|
7288
|
+
|
7204
7289
|
for (int i0 = 0; i0 < n_dims; i0 += 2) {
|
7205
7290
|
const float theta = powf(10000.0, ((float)-i0)/n_dims);
|
7206
7291
|
|
@@ -7226,7 +7311,6 @@ static void ggml_compute_forward_rope_f16(
|
|
7226
7311
|
const struct ggml_tensor * src0,
|
7227
7312
|
const struct ggml_tensor * src1,
|
7228
7313
|
struct ggml_tensor * dst) {
|
7229
|
-
assert(params->ith == 0);
|
7230
7314
|
assert(src1->type == GGML_TYPE_I32);
|
7231
7315
|
assert(ggml_nelements(src1) == 3);
|
7232
7316
|
|
@@ -7238,10 +7322,10 @@ static void ggml_compute_forward_rope_f16(
|
|
7238
7322
|
const int n_dims = ((int32_t *) src1->data)[1];
|
7239
7323
|
const int mode = ((int32_t *) src1->data)[2];
|
7240
7324
|
|
7241
|
-
//const
|
7242
|
-
const
|
7243
|
-
const
|
7244
|
-
const
|
7325
|
+
//const int64_t ne0 = src0->ne[0];
|
7326
|
+
const int64_t ne1 = src0->ne[1];
|
7327
|
+
const int64_t ne2 = src0->ne[2];
|
7328
|
+
const int64_t ne3 = src0->ne[3];
|
7245
7329
|
|
7246
7330
|
const int nb0 = src0->nb[0];
|
7247
7331
|
const int nb1 = src0->nb[1];
|
@@ -7253,10 +7337,28 @@ static void ggml_compute_forward_rope_f16(
|
|
7253
7337
|
|
7254
7338
|
assert(nb0 == sizeof(ggml_fp16_t));
|
7255
7339
|
|
7256
|
-
|
7257
|
-
|
7340
|
+
const int ith = params->ith;
|
7341
|
+
const int nth = params->nth;
|
7342
|
+
|
7343
|
+
const int nr = ggml_nrows(src0);
|
7344
|
+
|
7345
|
+
// rows per thread
|
7346
|
+
const int dr = (nr + nth - 1)/nth;
|
7347
|
+
|
7348
|
+
// row range for this thread
|
7349
|
+
const int ir0 = dr*ith;
|
7350
|
+
const int ir1 = MIN(ir0 + dr, nr);
|
7351
|
+
|
7352
|
+
// row index used to determine which thread to use
|
7353
|
+
int ir = 0;
|
7354
|
+
|
7355
|
+
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
7356
|
+
for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) {
|
7258
7357
|
const int p = (mode == 0 ? n_past + i2 : i2);
|
7259
|
-
for (
|
7358
|
+
for (int64_t i1 = 0; i1 < ne1; i1++) {
|
7359
|
+
if (ir++ < ir0) continue;
|
7360
|
+
if (ir > ir1) break;
|
7361
|
+
|
7260
7362
|
for (int i0 = 0; i0 < n_dims; i0 += 2) {
|
7261
7363
|
const float theta = powf(10000.0, ((float)-i0)/n_dims);
|
7262
7364
|
|
@@ -7317,21 +7419,21 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32(
|
|
7317
7419
|
int64_t t0 = ggml_perf_time_us();
|
7318
7420
|
UNUSED(t0);
|
7319
7421
|
|
7320
|
-
const
|
7321
|
-
const
|
7322
|
-
const
|
7323
|
-
//const
|
7422
|
+
const int64_t ne00 = src0->ne[0];
|
7423
|
+
const int64_t ne01 = src0->ne[1];
|
7424
|
+
const int64_t ne02 = src0->ne[2];
|
7425
|
+
//const int64_t ne03 = src0->ne[3];
|
7324
7426
|
|
7325
|
-
const
|
7326
|
-
const
|
7327
|
-
//const
|
7328
|
-
//const
|
7427
|
+
const int64_t ne10 = src1->ne[0];
|
7428
|
+
const int64_t ne11 = src1->ne[1];
|
7429
|
+
//const int64_t ne12 = src1->ne[2];
|
7430
|
+
//const int64_t ne13 = src1->ne[3];
|
7329
7431
|
|
7330
|
-
//const
|
7331
|
-
//const
|
7332
|
-
//const
|
7333
|
-
//const
|
7334
|
-
//const
|
7432
|
+
//const int64_t ne0 = dst->ne[0];
|
7433
|
+
//const int64_t ne1 = dst->ne[1];
|
7434
|
+
//const int64_t ne2 = dst->ne[2];
|
7435
|
+
//const int64_t ne3 = dst->ne[3];
|
7436
|
+
//const int64_t ne = ne0*ne1*ne2*ne3;
|
7335
7437
|
|
7336
7438
|
const int nb00 = src0->nb[0];
|
7337
7439
|
const int nb01 = src0->nb[1];
|
@@ -7368,11 +7470,11 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32(
|
|
7368
7470
|
{
|
7369
7471
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
7370
7472
|
|
7371
|
-
for (
|
7372
|
-
for (
|
7473
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
7474
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
7373
7475
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
7374
7476
|
ggml_fp16_t * dst_data = wdata + i02*ew0*ne00;
|
7375
|
-
for (
|
7477
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
7376
7478
|
dst_data[i00*ew0 + i01] = src[i00];
|
7377
7479
|
}
|
7378
7480
|
}
|
@@ -7383,10 +7485,10 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32(
|
|
7383
7485
|
{
|
7384
7486
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + ne02*ew0*ne00;
|
7385
7487
|
|
7386
|
-
for (
|
7488
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
7387
7489
|
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
7388
7490
|
ggml_fp16_t * dst_data = wdata;
|
7389
|
-
for (
|
7491
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
7390
7492
|
dst_data[(i10 + nh)*ew0 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
7391
7493
|
}
|
7392
7494
|
}
|
@@ -7411,7 +7513,7 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32(
|
|
7411
7513
|
|
7412
7514
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
7413
7515
|
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
7414
|
-
for (
|
7516
|
+
for (int64_t i0 = 0; i0 < ne10; ++i0) {
|
7415
7517
|
dst_data[i0] = 0;
|
7416
7518
|
for (int k = -nh; k <= nh; k++) {
|
7417
7519
|
float v = 0.0f;
|
@@ -7437,21 +7539,21 @@ static void ggml_compute_forward_conv_1d_1s_f32(
|
|
7437
7539
|
int64_t t0 = ggml_perf_time_us();
|
7438
7540
|
UNUSED(t0);
|
7439
7541
|
|
7440
|
-
const
|
7441
|
-
const
|
7442
|
-
const
|
7443
|
-
//const
|
7542
|
+
const int64_t ne00 = src0->ne[0];
|
7543
|
+
const int64_t ne01 = src0->ne[1];
|
7544
|
+
const int64_t ne02 = src0->ne[2];
|
7545
|
+
//const int64_t ne03 = src0->ne[3];
|
7444
7546
|
|
7445
|
-
const
|
7446
|
-
const
|
7447
|
-
//const
|
7448
|
-
//const
|
7547
|
+
const int64_t ne10 = src1->ne[0];
|
7548
|
+
const int64_t ne11 = src1->ne[1];
|
7549
|
+
//const int64_t ne12 = src1->ne[2];
|
7550
|
+
//const int64_t ne13 = src1->ne[3];
|
7449
7551
|
|
7450
|
-
//const
|
7451
|
-
//const
|
7452
|
-
//const
|
7453
|
-
//const
|
7454
|
-
//const
|
7552
|
+
//const int64_t ne0 = dst->ne[0];
|
7553
|
+
//const int64_t ne1 = dst->ne[1];
|
7554
|
+
//const int64_t ne2 = dst->ne[2];
|
7555
|
+
//const int64_t ne3 = dst->ne[3];
|
7556
|
+
//const int64_t ne = ne0*ne1*ne2*ne3;
|
7455
7557
|
|
7456
7558
|
const int nb00 = src0->nb[0];
|
7457
7559
|
const int nb01 = src0->nb[1];
|
@@ -7488,11 +7590,11 @@ static void ggml_compute_forward_conv_1d_1s_f32(
|
|
7488
7590
|
{
|
7489
7591
|
float * const wdata = (float *) params->wdata + 0;
|
7490
7592
|
|
7491
|
-
for (
|
7492
|
-
for (
|
7593
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
7594
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
7493
7595
|
const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01);
|
7494
7596
|
float * dst_data = wdata + i02*ew0*ne00;
|
7495
|
-
for (
|
7597
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
7496
7598
|
dst_data[i00*ew0 + i01] = src[i00];
|
7497
7599
|
}
|
7498
7600
|
}
|
@@ -7503,10 +7605,10 @@ static void ggml_compute_forward_conv_1d_1s_f32(
|
|
7503
7605
|
{
|
7504
7606
|
float * const wdata = (float *) params->wdata + ne02*ew0*ne00;
|
7505
7607
|
|
7506
|
-
for (
|
7608
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
7507
7609
|
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
7508
7610
|
float * dst_data = wdata;
|
7509
|
-
for (
|
7611
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
7510
7612
|
dst_data[(i10 + nh)*ew0 + i11] = src[i10];
|
7511
7613
|
}
|
7512
7614
|
}
|
@@ -7531,7 +7633,7 @@ static void ggml_compute_forward_conv_1d_1s_f32(
|
|
7531
7633
|
|
7532
7634
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
7533
7635
|
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
7534
|
-
for (
|
7636
|
+
for (int64_t i0 = 0; i0 < ne10; ++i0) {
|
7535
7637
|
dst_data[i0] = 0;
|
7536
7638
|
for (int k = -nh; k <= nh; k++) {
|
7537
7639
|
float v = 0.0f;
|
@@ -7585,21 +7687,21 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32(
|
|
7585
7687
|
int64_t t0 = ggml_perf_time_us();
|
7586
7688
|
UNUSED(t0);
|
7587
7689
|
|
7588
|
-
const
|
7589
|
-
const
|
7590
|
-
const
|
7591
|
-
//const
|
7690
|
+
const int64_t ne00 = src0->ne[0];
|
7691
|
+
const int64_t ne01 = src0->ne[1];
|
7692
|
+
const int64_t ne02 = src0->ne[2];
|
7693
|
+
//const int64_t ne03 = src0->ne[3];
|
7592
7694
|
|
7593
|
-
const
|
7594
|
-
const
|
7595
|
-
//const
|
7596
|
-
//const
|
7695
|
+
const int64_t ne10 = src1->ne[0];
|
7696
|
+
const int64_t ne11 = src1->ne[1];
|
7697
|
+
//const int64_t ne12 = src1->ne[2];
|
7698
|
+
//const int64_t ne13 = src1->ne[3];
|
7597
7699
|
|
7598
|
-
//const
|
7599
|
-
//const
|
7600
|
-
//const
|
7601
|
-
//const
|
7602
|
-
//const
|
7700
|
+
//const int64_t ne0 = dst->ne[0];
|
7701
|
+
//const int64_t ne1 = dst->ne[1];
|
7702
|
+
//const int64_t ne2 = dst->ne[2];
|
7703
|
+
//const int64_t ne3 = dst->ne[3];
|
7704
|
+
//const int64_t ne = ne0*ne1*ne2*ne3;
|
7603
7705
|
|
7604
7706
|
const int nb00 = src0->nb[0];
|
7605
7707
|
const int nb01 = src0->nb[1];
|
@@ -7636,11 +7738,11 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32(
|
|
7636
7738
|
{
|
7637
7739
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
|
7638
7740
|
|
7639
|
-
for (
|
7640
|
-
for (
|
7741
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
7742
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
7641
7743
|
const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i02*nb02 + i01*nb01);
|
7642
7744
|
ggml_fp16_t * dst_data = wdata + i02*ew0*ne00;
|
7643
|
-
for (
|
7745
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
7644
7746
|
dst_data[i00*ew0 + i01] = src[i00];
|
7645
7747
|
}
|
7646
7748
|
}
|
@@ -7651,10 +7753,10 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32(
|
|
7651
7753
|
{
|
7652
7754
|
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + ne02*ew0*ne00;
|
7653
7755
|
|
7654
|
-
for (
|
7756
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
7655
7757
|
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
7656
7758
|
ggml_fp16_t * dst_data = wdata;
|
7657
|
-
for (
|
7759
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
7658
7760
|
dst_data[(i10 + nh)*ew0 + i11] = GGML_FP32_TO_FP16(src[i10]);
|
7659
7761
|
}
|
7660
7762
|
}
|
@@ -7679,7 +7781,7 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32(
|
|
7679
7781
|
|
7680
7782
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
7681
7783
|
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
7682
|
-
for (
|
7784
|
+
for (int64_t i0 = 0; i0 < ne10; i0 += 2) {
|
7683
7785
|
dst_data[i0/2] = 0;
|
7684
7786
|
for (int k = -nh; k <= nh; k++) {
|
7685
7787
|
float v = 0.0f;
|
@@ -7705,21 +7807,21 @@ static void ggml_compute_forward_conv_1d_2s_f32(
|
|
7705
7807
|
int64_t t0 = ggml_perf_time_us();
|
7706
7808
|
UNUSED(t0);
|
7707
7809
|
|
7708
|
-
const
|
7709
|
-
const
|
7710
|
-
const
|
7711
|
-
//const
|
7810
|
+
const int64_t ne00 = src0->ne[0];
|
7811
|
+
const int64_t ne01 = src0->ne[1];
|
7812
|
+
const int64_t ne02 = src0->ne[2];
|
7813
|
+
//const int64_t ne03 = src0->ne[3];
|
7712
7814
|
|
7713
|
-
const
|
7714
|
-
const
|
7715
|
-
//const
|
7716
|
-
//const
|
7815
|
+
const int64_t ne10 = src1->ne[0];
|
7816
|
+
const int64_t ne11 = src1->ne[1];
|
7817
|
+
//const int64_t ne12 = src1->ne[2];
|
7818
|
+
//const int64_t ne13 = src1->ne[3];
|
7717
7819
|
|
7718
|
-
//const
|
7719
|
-
//const
|
7720
|
-
//const
|
7721
|
-
//const
|
7722
|
-
//const
|
7820
|
+
//const int64_t ne0 = dst->ne[0];
|
7821
|
+
//const int64_t ne1 = dst->ne[1];
|
7822
|
+
//const int64_t ne2 = dst->ne[2];
|
7823
|
+
//const int64_t ne3 = dst->ne[3];
|
7824
|
+
//const int64_t ne = ne0*ne1*ne2*ne3;
|
7723
7825
|
|
7724
7826
|
const int nb00 = src0->nb[0];
|
7725
7827
|
const int nb01 = src0->nb[1];
|
@@ -7756,11 +7858,11 @@ static void ggml_compute_forward_conv_1d_2s_f32(
|
|
7756
7858
|
{
|
7757
7859
|
float * const wdata = (float *) params->wdata + 0;
|
7758
7860
|
|
7759
|
-
for (
|
7760
|
-
for (
|
7861
|
+
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
7862
|
+
for (int64_t i01 = 0; i01 < ne01; i01++) {
|
7761
7863
|
const float * const src = (float *)((char *) src0->data + i02*nb02 + i01*nb01);
|
7762
7864
|
float * dst_data = wdata + i02*ew0*ne00;
|
7763
|
-
for (
|
7865
|
+
for (int64_t i00 = 0; i00 < ne00; i00++) {
|
7764
7866
|
dst_data[i00*ew0 + i01] = src[i00];
|
7765
7867
|
}
|
7766
7868
|
}
|
@@ -7771,10 +7873,10 @@ static void ggml_compute_forward_conv_1d_2s_f32(
|
|
7771
7873
|
{
|
7772
7874
|
float * const wdata = (float *) params->wdata + ne02*ew0*ne00;
|
7773
7875
|
|
7774
|
-
for (
|
7876
|
+
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
7775
7877
|
const float * const src = (float *)((char *) src1->data + i11*nb11);
|
7776
7878
|
float * dst_data = wdata;
|
7777
|
-
for (
|
7879
|
+
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
7778
7880
|
dst_data[(i10 + nh)*ew0 + i11] = src[i10];
|
7779
7881
|
}
|
7780
7882
|
}
|
@@ -7799,7 +7901,7 @@ static void ggml_compute_forward_conv_1d_2s_f32(
|
|
7799
7901
|
|
7800
7902
|
for (int i1 = ir0; i1 < ir1; i1++) {
|
7801
7903
|
float * dst_data = (float *)((char *) dst->data + i1*nb1);
|
7802
|
-
for (
|
7904
|
+
for (int64_t i0 = 0; i0 < ne10; i0 += 2) {
|
7803
7905
|
dst_data[i0/2] = 0;
|
7804
7906
|
for (int k = -nh; k <= nh; k++) {
|
7805
7907
|
float v = 0.0f;
|
@@ -7851,25 +7953,25 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
7851
7953
|
int64_t t0 = ggml_perf_time_us();
|
7852
7954
|
UNUSED(t0);
|
7853
7955
|
|
7854
|
-
const
|
7855
|
-
const
|
7856
|
-
const
|
7857
|
-
const
|
7956
|
+
const int64_t neq0 = q->ne[0];
|
7957
|
+
const int64_t neq1 = q->ne[1];
|
7958
|
+
const int64_t neq2 = q->ne[2];
|
7959
|
+
const int64_t neq3 = q->ne[3];
|
7858
7960
|
|
7859
|
-
const
|
7860
|
-
const
|
7861
|
-
//const
|
7862
|
-
//const
|
7961
|
+
const int64_t nek0 = k->ne[0];
|
7962
|
+
const int64_t nek1 = k->ne[1];
|
7963
|
+
//const int64_t nek2 = k->ne[2];
|
7964
|
+
//const int64_t nek3 = k->ne[3];
|
7863
7965
|
|
7864
|
-
//const
|
7865
|
-
const
|
7866
|
-
//const
|
7867
|
-
//const
|
7966
|
+
//const int64_t nev0 = v->ne[0];
|
7967
|
+
const int64_t nev1 = v->ne[1];
|
7968
|
+
//const int64_t nev2 = v->ne[2];
|
7969
|
+
//const int64_t nev3 = v->ne[3];
|
7868
7970
|
|
7869
|
-
const
|
7870
|
-
const
|
7871
|
-
//const
|
7872
|
-
//const
|
7971
|
+
const int64_t ne0 = dst->ne[0];
|
7972
|
+
const int64_t ne1 = dst->ne[1];
|
7973
|
+
//const int64_t ne2 = dst->ne[2];
|
7974
|
+
//const int64_t ne3 = dst->ne[3];
|
7873
7975
|
|
7874
7976
|
const int nbk0 = k->nb[0];
|
7875
7977
|
const int nbk1 = k->nb[1];
|
@@ -7894,10 +7996,10 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
7894
7996
|
const int ith = params->ith;
|
7895
7997
|
const int nth = params->nth;
|
7896
7998
|
|
7897
|
-
const
|
7898
|
-
const
|
7899
|
-
const
|
7900
|
-
const
|
7999
|
+
const int64_t D = neq0;
|
8000
|
+
const int64_t N = neq1;
|
8001
|
+
const int64_t P = nek1 - N;
|
8002
|
+
const int64_t M = P + N;
|
7901
8003
|
|
7902
8004
|
const int Mup = ggml_up(M, GGML_SOFT_MAX_UNROLL);
|
7903
8005
|
|
@@ -7959,7 +8061,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
7959
8061
|
S[i] = -INFINITY;
|
7960
8062
|
}
|
7961
8063
|
|
7962
|
-
for (
|
8064
|
+
for (int64_t ic = 0; ic < nek1; ++ic) {
|
7963
8065
|
// k indices
|
7964
8066
|
const int ik3 = iq3;
|
7965
8067
|
const int ik2 = iq2;
|
@@ -7978,7 +8080,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
7978
8080
|
ggml_vec_scale_f32(nek1, S, scale);
|
7979
8081
|
|
7980
8082
|
if (masked) {
|
7981
|
-
for (
|
8083
|
+
for (int64_t i = P; i < M; i++) {
|
7982
8084
|
if (i > P + iq1) {
|
7983
8085
|
S[i] = -INFINITY;
|
7984
8086
|
}
|
@@ -8036,7 +8138,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|
8036
8138
|
#endif
|
8037
8139
|
}
|
8038
8140
|
|
8039
|
-
for (
|
8141
|
+
for (int64_t ic = 0; ic < nev1; ++ic) {
|
8040
8142
|
// dst indices
|
8041
8143
|
const int i1 = iq1;
|
8042
8144
|
const int i2 = iq2;
|
@@ -8060,25 +8162,25 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8060
8162
|
int64_t t0 = ggml_perf_time_us();
|
8061
8163
|
UNUSED(t0);
|
8062
8164
|
|
8063
|
-
const
|
8064
|
-
const
|
8065
|
-
const
|
8066
|
-
const
|
8165
|
+
const int64_t neq0 = q->ne[0];
|
8166
|
+
const int64_t neq1 = q->ne[1];
|
8167
|
+
const int64_t neq2 = q->ne[2];
|
8168
|
+
const int64_t neq3 = q->ne[3];
|
8067
8169
|
|
8068
|
-
const
|
8069
|
-
const
|
8070
|
-
//const
|
8071
|
-
//const
|
8170
|
+
const int64_t nek0 = k->ne[0];
|
8171
|
+
const int64_t nek1 = k->ne[1];
|
8172
|
+
//const int64_t nek2 = k->ne[2];
|
8173
|
+
//const int64_t nek3 = k->ne[3];
|
8072
8174
|
|
8073
|
-
//const
|
8074
|
-
const
|
8075
|
-
//const
|
8076
|
-
//const
|
8175
|
+
//const int64_t nev0 = v->ne[0];
|
8176
|
+
const int64_t nev1 = v->ne[1];
|
8177
|
+
//const int64_t nev2 = v->ne[2];
|
8178
|
+
//const int64_t nev3 = v->ne[3];
|
8077
8179
|
|
8078
|
-
const
|
8079
|
-
const
|
8080
|
-
//const
|
8081
|
-
//const
|
8180
|
+
const int64_t ne0 = dst->ne[0];
|
8181
|
+
const int64_t ne1 = dst->ne[1];
|
8182
|
+
//const int64_t ne2 = dst->ne[2];
|
8183
|
+
//const int64_t ne3 = dst->ne[3];
|
8082
8184
|
|
8083
8185
|
const int nbk0 = k->nb[0];
|
8084
8186
|
const int nbk1 = k->nb[1];
|
@@ -8103,10 +8205,10 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8103
8205
|
const int ith = params->ith;
|
8104
8206
|
const int nth = params->nth;
|
8105
8207
|
|
8106
|
-
const
|
8107
|
-
const
|
8108
|
-
const
|
8109
|
-
const
|
8208
|
+
const int64_t D = neq0;
|
8209
|
+
const int64_t N = neq1;
|
8210
|
+
const int64_t P = nek1 - N;
|
8211
|
+
const int64_t M = P + N;
|
8110
8212
|
|
8111
8213
|
const int Mup = ggml_up(M, GGML_SOFT_MAX_UNROLL);
|
8112
8214
|
|
@@ -8169,7 +8271,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8169
8271
|
}
|
8170
8272
|
|
8171
8273
|
if (GGML_VEC_DOT_UNROLL > 2 || nek1 % GGML_VEC_DOT_UNROLL != 0) {
|
8172
|
-
for (
|
8274
|
+
for (int64_t ic = 0; ic < nek1; ++ic) {
|
8173
8275
|
// k indices
|
8174
8276
|
const int ik3 = iq3;
|
8175
8277
|
const int ik2 = iq2;
|
@@ -8184,7 +8286,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8184
8286
|
(ggml_fp16_t *) ((char *) q->data + (iq1*nbq1 + iq2*nbq2 + iq3*nbq3)));
|
8185
8287
|
}
|
8186
8288
|
} else {
|
8187
|
-
for (
|
8289
|
+
for (int64_t ic = 0; ic < nek1; ic += GGML_VEC_DOT_UNROLL) {
|
8188
8290
|
// k indices
|
8189
8291
|
const int ik3 = iq3;
|
8190
8292
|
const int ik2 = iq2;
|
@@ -8204,7 +8306,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8204
8306
|
ggml_vec_scale_f32(nek1, S, scale);
|
8205
8307
|
|
8206
8308
|
if (masked) {
|
8207
|
-
for (
|
8309
|
+
for (int64_t i = P; i < M; i++) {
|
8208
8310
|
if (i > P + iq1) {
|
8209
8311
|
S[i] = -INFINITY;
|
8210
8312
|
}
|
@@ -8264,12 +8366,12 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8264
8366
|
|
8265
8367
|
ggml_fp16_t * S16 = (ggml_fp16_t *) ((float *) params->wdata + ith*(2*Mup + CACHE_LINE_SIZE_F32) + Mup);
|
8266
8368
|
|
8267
|
-
for (
|
8369
|
+
for (int64_t i = 0; i < M; i++) {
|
8268
8370
|
S16[i] = GGML_FP32_TO_FP16(S[i]);
|
8269
8371
|
}
|
8270
8372
|
|
8271
8373
|
if (GGML_VEC_DOT_UNROLL == 1 || (nev1 % GGML_VEC_DOT_UNROLL != 0)) {
|
8272
|
-
for (
|
8374
|
+
for (int64_t ic = 0; ic < nev1; ++ic) {
|
8273
8375
|
// dst indices
|
8274
8376
|
const int i1 = iq1;
|
8275
8377
|
const int i2 = iq2;
|
@@ -8281,7 +8383,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
|
8281
8383
|
S16);
|
8282
8384
|
}
|
8283
8385
|
} else {
|
8284
|
-
for (
|
8386
|
+
for (int64_t ic = 0; ic < nev1; ic += GGML_VEC_DOT_UNROLL) {
|
8285
8387
|
// dst indices
|
8286
8388
|
const int i1 = iq1;
|
8287
8389
|
const int i2 = iq2;
|
@@ -8337,35 +8439,35 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
8337
8439
|
int64_t t0 = ggml_perf_time_us();
|
8338
8440
|
UNUSED(t0);
|
8339
8441
|
|
8340
|
-
const
|
8341
|
-
const
|
8342
|
-
const
|
8343
|
-
const
|
8442
|
+
const int64_t nea0 = a->ne[0];
|
8443
|
+
const int64_t nea1 = a->ne[1];
|
8444
|
+
const int64_t nea2 = a->ne[2];
|
8445
|
+
const int64_t nea3 = a->ne[3];
|
8344
8446
|
|
8345
|
-
const
|
8346
|
-
const
|
8347
|
-
//const
|
8348
|
-
//const
|
8447
|
+
const int64_t neb00 = b0->ne[0];
|
8448
|
+
const int64_t neb01 = b0->ne[1];
|
8449
|
+
//const int64_t neb02 = b0->ne[2];
|
8450
|
+
//const int64_t neb03 = b0->ne[3];
|
8349
8451
|
|
8350
|
-
const
|
8351
|
-
const
|
8352
|
-
//const
|
8353
|
-
//const
|
8452
|
+
const int64_t neb10 = b1->ne[0];
|
8453
|
+
const int64_t neb11 = b1->ne[1];
|
8454
|
+
//const int64_t neb12 = b1->ne[2];
|
8455
|
+
//const int64_t neb13 = b1->ne[3];
|
8354
8456
|
|
8355
|
-
const
|
8356
|
-
const
|
8357
|
-
//const
|
8358
|
-
//const
|
8457
|
+
const int64_t nec00 = c0->ne[0];
|
8458
|
+
const int64_t nec01 = c0->ne[1];
|
8459
|
+
//const int64_t nec02 = c0->ne[2];
|
8460
|
+
//const int64_t nec03 = c0->ne[3];
|
8359
8461
|
|
8360
|
-
const
|
8361
|
-
const
|
8362
|
-
//const
|
8363
|
-
//const
|
8462
|
+
const int64_t nec10 = c1->ne[0];
|
8463
|
+
const int64_t nec11 = c1->ne[1];
|
8464
|
+
//const int64_t nec12 = c1->ne[2];
|
8465
|
+
//const int64_t nec13 = c1->ne[3];
|
8364
8466
|
|
8365
|
-
const
|
8366
|
-
const
|
8367
|
-
const
|
8368
|
-
//const
|
8467
|
+
const int64_t ne0 = dst->ne[0];
|
8468
|
+
const int64_t ne1 = dst->ne[1];
|
8469
|
+
const int64_t ne2 = dst->ne[2];
|
8470
|
+
//const int64_t ne3 = dst->ne[3];
|
8369
8471
|
|
8370
8472
|
const int nba0 = a->nb[0];
|
8371
8473
|
const int nba1 = a->nb[1];
|
@@ -8400,9 +8502,9 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
8400
8502
|
const int ith = params->ith;
|
8401
8503
|
const int nth = params->nth;
|
8402
8504
|
|
8403
|
-
const
|
8404
|
-
//const
|
8405
|
-
const
|
8505
|
+
const int64_t D = nea0;
|
8506
|
+
//const int64_t N = nea1;
|
8507
|
+
const int64_t M = neb01;
|
8406
8508
|
|
8407
8509
|
GGML_ASSERT(ne0 == nea0);
|
8408
8510
|
GGML_ASSERT(ne1 == nea1);
|
@@ -8458,7 +8560,7 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
8458
8560
|
|
8459
8561
|
float * S = (float *) params->wdata + ith*(2*M + CACHE_LINE_SIZE_F32);
|
8460
8562
|
|
8461
|
-
for (
|
8563
|
+
for (int64_t ic = 0; ic < neb01; ++ic) {
|
8462
8564
|
// b0 indices
|
8463
8565
|
const int ib03 = ia3;
|
8464
8566
|
const int ib02 = ia2;
|
@@ -8478,7 +8580,7 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
8478
8580
|
|
8479
8581
|
ggml_fp16_t * S16 = (ggml_fp16_t *) ((float *) params->wdata + ith*(2*M + CACHE_LINE_SIZE_F32) + M);
|
8480
8582
|
|
8481
|
-
for (
|
8583
|
+
for (int64_t i = 0; i < M; i++) {
|
8482
8584
|
S16[i] = GGML_FP32_TO_FP16(S[i]);
|
8483
8585
|
}
|
8484
8586
|
|
@@ -8490,7 +8592,7 @@ static void ggml_compute_forward_flash_ff_f16(
|
|
8490
8592
|
const int i2 = ia2;
|
8491
8593
|
const int i3 = ia3;
|
8492
8594
|
|
8493
|
-
for (
|
8595
|
+
for (int64_t ic = 0; ic < nec01; ++ic) {
|
8494
8596
|
|
8495
8597
|
ggml_vec_dot_f16(neb01,
|
8496
8598
|
(float *) ((char *) dst->data + (ic*nb0 + i1*nb1 + i2*nb2 + i3*nb3)),
|
@@ -9355,7 +9457,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
9355
9457
|
} break;
|
9356
9458
|
case GGML_OP_ROPE:
|
9357
9459
|
{
|
9358
|
-
node->n_tasks =
|
9460
|
+
node->n_tasks = n_threads;
|
9359
9461
|
} break;
|
9360
9462
|
case GGML_OP_CONV_1D_1S:
|
9361
9463
|
case GGML_OP_CONV_1D_2S:
|
@@ -9393,7 +9495,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
|
|
9393
9495
|
|
9394
9496
|
size_t cur = 0;
|
9395
9497
|
|
9396
|
-
const
|
9498
|
+
const int64_t ne11 = ggml_up(node->src1->ne[1], GGML_SOFT_MAX_UNROLL);
|
9397
9499
|
|
9398
9500
|
if (node->src1->type == GGML_TYPE_F32) {
|
9399
9501
|
cur = sizeof(float)*ne11*node->n_tasks; // TODO: this can become (n_tasks-1)
|
@@ -9652,7 +9754,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
9652
9754
|
|
9653
9755
|
perf_total_per_op_us[node->op] += node->perf_time_us;
|
9654
9756
|
|
9655
|
-
GGML_PRINT(" - %3d: [ %
|
9757
|
+
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 ", %" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
|
9656
9758
|
i,
|
9657
9759
|
node->ne[0], node->ne[1], node->ne[2],
|
9658
9760
|
GGML_OP_LABEL[node->op], node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
|
@@ -9666,7 +9768,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
|
|
9666
9768
|
for (int i = 0; i < cgraph->n_leafs; i++) {
|
9667
9769
|
struct ggml_tensor * node = cgraph->leafs[i];
|
9668
9770
|
|
9669
|
-
GGML_PRINT(" - %3d: [ %
|
9771
|
+
GGML_PRINT(" - %3d: [ %" PRId64 ", %" PRId64 "] %8s\n",
|
9670
9772
|
i,
|
9671
9773
|
node->ne[0], node->ne[1],
|
9672
9774
|
GGML_OP_LABEL[node->op]);
|
@@ -9737,7 +9839,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
|
9737
9839
|
|
9738
9840
|
fprintf(fp, " \"%p\" [ \
|
9739
9841
|
style = filled; fillcolor = %s; shape = record; \
|
9740
|
-
label=\"%d [%
|
9842
|
+
label=\"%d [%" PRId64 ", %" PRId64 "] | <x>%s",
|
9741
9843
|
(void *) node, color,
|
9742
9844
|
i, node->ne[0], node->ne[1],
|
9743
9845
|
GGML_OP_SYMBOL[node->op]);
|
@@ -9762,7 +9864,7 @@ label=\"<x>%.1e\"; ]\n",
|
|
9762
9864
|
} else {
|
9763
9865
|
fprintf(fp, " \"%p\" [ \
|
9764
9866
|
style = filled; fillcolor = %s; shape = record; \
|
9765
|
-
label=\"<x>CONST %d [%
|
9867
|
+
label=\"<x>CONST %d [%" PRId64 ", %" PRId64 "]\"; ]\n",
|
9766
9868
|
(void *) node, color,
|
9767
9869
|
i, node->ne[0], node->ne[1]);
|
9768
9870
|
}
|
@@ -9826,9 +9928,9 @@ label=\"<x>CONST %d [%d, %d]\"; ]\n",
|
|
9826
9928
|
static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) {
|
9827
9929
|
int i = 0;
|
9828
9930
|
for (int p = 0; p < np; ++p) {
|
9829
|
-
const
|
9931
|
+
const int64_t ne = ggml_nelements(ps[p]) ;
|
9830
9932
|
// TODO: add function to set tensor from array
|
9831
|
-
for (
|
9933
|
+
for (int64_t j = 0; j < ne; ++j) {
|
9832
9934
|
ggml_set_f32_1d(ps[p], j, x[i++]);
|
9833
9935
|
}
|
9834
9936
|
}
|
@@ -9837,9 +9939,9 @@ static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const f
|
|
9837
9939
|
static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) {
|
9838
9940
|
int i = 0;
|
9839
9941
|
for (int p = 0; p < np; ++p) {
|
9840
|
-
const
|
9942
|
+
const int64_t ne = ggml_nelements(ps[p]) ;
|
9841
9943
|
// TODO: add function to get all elements at once
|
9842
|
-
for (
|
9944
|
+
for (int64_t j = 0; j < ne; ++j) {
|
9843
9945
|
x[i++] = ggml_get_f32_1d(ps[p], j);
|
9844
9946
|
}
|
9845
9947
|
}
|
@@ -9848,9 +9950,9 @@ static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float *
|
|
9848
9950
|
static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) {
|
9849
9951
|
int i = 0;
|
9850
9952
|
for (int p = 0; p < np; ++p) {
|
9851
|
-
const
|
9953
|
+
const int64_t ne = ggml_nelements(ps[p]) ;
|
9852
9954
|
// TODO: add function to get all elements at once
|
9853
|
-
for (
|
9955
|
+
for (int64_t j = 0; j < ne; ++j) {
|
9854
9956
|
g[i++] = ggml_get_f32_1d(ps[p]->grad, j);
|
9855
9957
|
}
|
9856
9958
|
}
|