llama_cpp 0.12.3 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +22 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -2
- data/vendor/tmp/llama.cpp/Makefile +23 -4
- data/vendor/tmp/llama.cpp/ggml-alloc.c +85 -25
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +115 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +121 -86
- data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
- data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +706 -15
- data/vendor/tmp/llama.cpp/ggml-quants.h +17 -1
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15255 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +60854 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5270 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +34 -0
- data/vendor/tmp/llama.cpp/ggml.c +350 -57
- data/vendor/tmp/llama.cpp/ggml.h +7 -1
- data/vendor/tmp/llama.cpp/llama.cpp +574 -39
- data/vendor/tmp/llama.cpp/llama.h +11 -15
- metadata +9 -2
data/vendor/tmp/llama.cpp/ggml.c
CHANGED
@@ -218,6 +218,7 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
218
218
|
break;
|
219
219
|
}
|
220
220
|
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
|
221
|
+
GGML_ASSERT(false);
|
221
222
|
return NULL;
|
222
223
|
}
|
223
224
|
return aligned_memory;
|
@@ -230,6 +231,38 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
230
231
|
#endif
|
231
232
|
#endif
|
232
233
|
|
234
|
+
inline static void * ggml_malloc(size_t size) {
|
235
|
+
if (size == 0) {
|
236
|
+
GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
|
237
|
+
return NULL;
|
238
|
+
}
|
239
|
+
void * result = malloc(size);
|
240
|
+
if (result == NULL) {
|
241
|
+
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
242
|
+
GGML_ASSERT(false);
|
243
|
+
}
|
244
|
+
return result;
|
245
|
+
}
|
246
|
+
|
247
|
+
// calloc
|
248
|
+
inline static void * ggml_calloc(size_t num, size_t size) {
|
249
|
+
if (num == 0 || size == 0) {
|
250
|
+
GGML_PRINT("WARNING: Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
|
251
|
+
return NULL;
|
252
|
+
}
|
253
|
+
void * result = calloc(num, size);
|
254
|
+
if (result == NULL) {
|
255
|
+
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
|
256
|
+
GGML_ASSERT(false);
|
257
|
+
}
|
258
|
+
return result;
|
259
|
+
}
|
260
|
+
|
261
|
+
#define GGML_MALLOC(size) ggml_malloc(size)
|
262
|
+
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
|
263
|
+
|
264
|
+
#define GGML_FREE(ptr) free(ptr)
|
265
|
+
|
233
266
|
#define UNUSED GGML_UNUSED
|
234
267
|
#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
|
235
268
|
|
@@ -248,6 +281,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
|
|
248
281
|
#include "ggml-cuda.h"
|
249
282
|
#elif defined(GGML_USE_CLBLAST)
|
250
283
|
#include "ggml-opencl.h"
|
284
|
+
#elif defined(GGML_USE_VULKAN)
|
285
|
+
#include "ggml-vulkan.h"
|
286
|
+
#elif defined(GGML_USE_SYCL)
|
287
|
+
#include "ggml-sycl.h"
|
251
288
|
#endif
|
252
289
|
|
253
290
|
// floating point type used to accumulate sums
|
@@ -595,6 +632,17 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
|
595
632
|
.vec_dot = ggml_vec_dot_iq2_xs_q8_K,
|
596
633
|
.vec_dot_type = GGML_TYPE_Q8_K,
|
597
634
|
},
|
635
|
+
[GGML_TYPE_IQ3_XXS] = {
|
636
|
+
.type_name = "iq3_xxs",
|
637
|
+
.blck_size = QK_K,
|
638
|
+
.type_size = sizeof(block_iq3_xxs),
|
639
|
+
.is_quantized = true,
|
640
|
+
.to_float = (ggml_to_float_t) dequantize_row_iq3_xxs,
|
641
|
+
.from_float = quantize_row_iq3_xxs,
|
642
|
+
.from_float_reference = (ggml_from_float_t)quantize_row_iq3_xxs_reference,
|
643
|
+
.vec_dot = ggml_vec_dot_iq3_xxs_q8_K,
|
644
|
+
.vec_dot_type = GGML_TYPE_Q8_K,
|
645
|
+
},
|
598
646
|
[GGML_TYPE_Q8_K] = {
|
599
647
|
.type_name = "q8_K",
|
600
648
|
.blck_size = QK_K,
|
@@ -2140,6 +2188,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
|
|
2140
2188
|
case GGML_FTYPE_MOSTLY_Q6_K: wtype = GGML_TYPE_Q6_K; break;
|
2141
2189
|
case GGML_FTYPE_MOSTLY_IQ2_XXS: wtype = GGML_TYPE_IQ2_XXS; break;
|
2142
2190
|
case GGML_FTYPE_MOSTLY_IQ2_XS: wtype = GGML_TYPE_IQ2_XS; break;
|
2191
|
+
case GGML_FTYPE_MOSTLY_IQ3_XXS: wtype = GGML_TYPE_IQ3_XXS; break;
|
2143
2192
|
case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break;
|
2144
2193
|
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
|
2145
2194
|
}
|
@@ -2293,6 +2342,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|
2293
2342
|
ggml_init_cublas();
|
2294
2343
|
#elif defined(GGML_USE_CLBLAST)
|
2295
2344
|
ggml_cl_init();
|
2345
|
+
#elif defined(GGML_USE_VULKAN)
|
2346
|
+
ggml_vk_init();
|
2347
|
+
#elif defined(GGML_USE_SYCL)
|
2348
|
+
ggml_init_sycl();
|
2296
2349
|
#endif
|
2297
2350
|
|
2298
2351
|
ggml_setup_op_has_task_pass();
|
@@ -5296,7 +5349,7 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|
5296
5349
|
int s0,
|
5297
5350
|
int p0,
|
5298
5351
|
int d0) {
|
5299
|
-
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false); // [N, OL, IC * K]
|
5352
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
|
5300
5353
|
|
5301
5354
|
struct ggml_tensor * result =
|
5302
5355
|
ggml_mul_mat(ctx,
|
@@ -5374,16 +5427,15 @@ struct ggml_tensor * ggml_conv_depthwise_2d(
|
|
5374
5427
|
int p1,
|
5375
5428
|
int d0,
|
5376
5429
|
int d1) {
|
5430
|
+
|
5377
5431
|
struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
5378
5432
|
struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
|
5379
5433
|
ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
5380
|
-
s0, s1, p0, p1, d0, d1, true); // [N * IC, OH, OW, KH * KW]
|
5381
|
-
|
5382
|
-
struct ggml_tensor * result =
|
5383
|
-
ggml_mul_mat(ctx,
|
5384
|
-
ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1), // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
5385
|
-
ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3])); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
5434
|
+
s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
5435
|
+
struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
5386
5436
|
|
5437
|
+
new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
5438
|
+
struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
|
5387
5439
|
result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
5388
5440
|
|
5389
5441
|
return result;
|
@@ -5404,7 +5456,8 @@ struct ggml_tensor * ggml_im2col(
|
|
5404
5456
|
int p1,
|
5405
5457
|
int d0,
|
5406
5458
|
int d1,
|
5407
|
-
bool is_2D
|
5459
|
+
bool is_2D,
|
5460
|
+
enum ggml_type dst_type) {
|
5408
5461
|
|
5409
5462
|
if(is_2D) {
|
5410
5463
|
GGML_ASSERT(a->ne[2] == b->ne[2]);
|
@@ -5428,7 +5481,7 @@ struct ggml_tensor * ggml_im2col(
|
|
5428
5481
|
is_2D ? b->ne[3] : 1,
|
5429
5482
|
};
|
5430
5483
|
|
5431
|
-
struct ggml_tensor * result = ggml_new_tensor(ctx,
|
5484
|
+
struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
|
5432
5485
|
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
5433
5486
|
ggml_set_op_params(result, params, sizeof(params));
|
5434
5487
|
|
@@ -5453,7 +5506,7 @@ struct ggml_tensor * ggml_conv_2d(
|
|
5453
5506
|
int p1,
|
5454
5507
|
int d0,
|
5455
5508
|
int d1) {
|
5456
|
-
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true); // [N, OH, OW, IC * KH * KW]
|
5509
|
+
struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N, OH, OW, IC * KH * KW]
|
5457
5510
|
|
5458
5511
|
struct ggml_tensor * result =
|
5459
5512
|
ggml_mul_mat(ctx,
|
@@ -5579,12 +5632,13 @@ struct ggml_tensor * ggml_pool_2d(
|
|
5579
5632
|
is_node = true;
|
5580
5633
|
}
|
5581
5634
|
|
5635
|
+
struct ggml_tensor * result;
|
5582
5636
|
const int64_t ne[3] = {
|
5583
5637
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
5584
5638
|
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
5585
5639
|
a->ne[2],
|
5586
5640
|
};
|
5587
|
-
|
5641
|
+
result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
5588
5642
|
|
5589
5643
|
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
5590
5644
|
ggml_set_op_params(result, params, sizeof(params));
|
@@ -5592,7 +5646,6 @@ struct ggml_tensor * ggml_pool_2d(
|
|
5592
5646
|
result->op = GGML_OP_POOL_2D;
|
5593
5647
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
5594
5648
|
result->src[0] = a;
|
5595
|
-
|
5596
5649
|
return result;
|
5597
5650
|
}
|
5598
5651
|
|
@@ -7207,6 +7260,17 @@ static void ggml_compute_forward_add_f32(
|
|
7207
7260
|
const int ith = params->ith;
|
7208
7261
|
const int nth = params->nth;
|
7209
7262
|
|
7263
|
+
#ifdef GGML_USE_CLBLAST
|
7264
|
+
if (src1->backend == GGML_BACKEND_GPU) {
|
7265
|
+
// TODO: OpenCL kernel support full broadcast
|
7266
|
+
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
7267
|
+
if (ith == 0) {
|
7268
|
+
ggml_cl_add(src0, src1, dst);
|
7269
|
+
}
|
7270
|
+
return;
|
7271
|
+
}
|
7272
|
+
#endif
|
7273
|
+
|
7210
7274
|
const int nr = ggml_nrows(src0);
|
7211
7275
|
|
7212
7276
|
GGML_TENSOR_BINARY_OP_LOCALS
|
@@ -7487,7 +7551,12 @@ static void ggml_compute_forward_add(
|
|
7487
7551
|
switch (src0->type) {
|
7488
7552
|
case GGML_TYPE_F32:
|
7489
7553
|
{
|
7490
|
-
|
7554
|
+
if (src1->type == GGML_TYPE_F32) {
|
7555
|
+
ggml_compute_forward_add_f32(params, src0, src1, dst);
|
7556
|
+
}
|
7557
|
+
else {
|
7558
|
+
GGML_ASSERT(false);
|
7559
|
+
}
|
7491
7560
|
} break;
|
7492
7561
|
case GGML_TYPE_F16:
|
7493
7562
|
{
|
@@ -7513,6 +7582,7 @@ static void ggml_compute_forward_add(
|
|
7513
7582
|
case GGML_TYPE_Q6_K:
|
7514
7583
|
case GGML_TYPE_IQ2_XXS:
|
7515
7584
|
case GGML_TYPE_IQ2_XS:
|
7585
|
+
case GGML_TYPE_IQ3_XXS:
|
7516
7586
|
{
|
7517
7587
|
ggml_compute_forward_add_q_f32(params, src0, src1, dst);
|
7518
7588
|
} break;
|
@@ -7779,6 +7849,7 @@ static void ggml_compute_forward_add1(
|
|
7779
7849
|
case GGML_TYPE_Q6_K:
|
7780
7850
|
case GGML_TYPE_IQ2_XXS:
|
7781
7851
|
case GGML_TYPE_IQ2_XS:
|
7852
|
+
case GGML_TYPE_IQ3_XXS:
|
7782
7853
|
{
|
7783
7854
|
ggml_compute_forward_add1_q_f32(params, src0, src1, dst);
|
7784
7855
|
} break;
|
@@ -7898,6 +7969,7 @@ static void ggml_compute_forward_acc(
|
|
7898
7969
|
case GGML_TYPE_Q6_K:
|
7899
7970
|
case GGML_TYPE_IQ2_XXS:
|
7900
7971
|
case GGML_TYPE_IQ2_XS:
|
7972
|
+
case GGML_TYPE_IQ3_XXS:
|
7901
7973
|
default:
|
7902
7974
|
{
|
7903
7975
|
GGML_ASSERT(false);
|
@@ -7999,7 +8071,7 @@ static void ggml_compute_forward_mul_f32(
|
|
7999
8071
|
const int ith = params->ith;
|
8000
8072
|
const int nth = params->nth;
|
8001
8073
|
|
8002
|
-
#
|
8074
|
+
#if defined(GGML_USE_CLBLAST)
|
8003
8075
|
if (src1->backend == GGML_BACKEND_GPU) {
|
8004
8076
|
// TODO: OpenCL kernel support full broadcast
|
8005
8077
|
GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
|
@@ -9954,7 +10026,7 @@ static void ggml_compute_forward_mul_mat(
|
|
9954
10026
|
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
|
9955
10027
|
if (ggml_compute_forward_mul_mat_use_blas(dst)) {
|
9956
10028
|
const int64_t ne_plane = ne01*ne00;
|
9957
|
-
const
|
10029
|
+
const size_t desired_wsize = ne13*ne12*ne_plane*sizeof(float);
|
9958
10030
|
UNUSED(desired_wsize);
|
9959
10031
|
|
9960
10032
|
if (params->type == GGML_TASK_INIT) {
|
@@ -10649,6 +10721,7 @@ static void ggml_compute_forward_out_prod(
|
|
10649
10721
|
case GGML_TYPE_Q6_K:
|
10650
10722
|
case GGML_TYPE_IQ2_XXS:
|
10651
10723
|
case GGML_TYPE_IQ2_XS:
|
10724
|
+
case GGML_TYPE_IQ3_XXS:
|
10652
10725
|
{
|
10653
10726
|
ggml_compute_forward_out_prod_q_f32(params, src0, src1, dst);
|
10654
10727
|
} break;
|
@@ -10828,6 +10901,7 @@ static void ggml_compute_forward_set(
|
|
10828
10901
|
case GGML_TYPE_Q6_K:
|
10829
10902
|
case GGML_TYPE_IQ2_XXS:
|
10830
10903
|
case GGML_TYPE_IQ2_XS:
|
10904
|
+
case GGML_TYPE_IQ3_XXS:
|
10831
10905
|
default:
|
10832
10906
|
{
|
10833
10907
|
GGML_ASSERT(false);
|
@@ -11024,6 +11098,7 @@ static void ggml_compute_forward_get_rows(
|
|
11024
11098
|
case GGML_TYPE_Q6_K:
|
11025
11099
|
case GGML_TYPE_IQ2_XXS:
|
11026
11100
|
case GGML_TYPE_IQ2_XS:
|
11101
|
+
case GGML_TYPE_IQ3_XXS:
|
11027
11102
|
{
|
11028
11103
|
ggml_compute_forward_get_rows_q(params, src0, src1, dst);
|
11029
11104
|
} break;
|
@@ -11671,6 +11746,7 @@ static void ggml_compute_forward_alibi(
|
|
11671
11746
|
case GGML_TYPE_Q6_K:
|
11672
11747
|
case GGML_TYPE_IQ2_XXS:
|
11673
11748
|
case GGML_TYPE_IQ2_XS:
|
11749
|
+
case GGML_TYPE_IQ3_XXS:
|
11674
11750
|
case GGML_TYPE_Q8_K:
|
11675
11751
|
case GGML_TYPE_I8:
|
11676
11752
|
case GGML_TYPE_I16:
|
@@ -11747,6 +11823,7 @@ static void ggml_compute_forward_clamp(
|
|
11747
11823
|
case GGML_TYPE_Q6_K:
|
11748
11824
|
case GGML_TYPE_IQ2_XXS:
|
11749
11825
|
case GGML_TYPE_IQ2_XS:
|
11826
|
+
case GGML_TYPE_IQ3_XXS:
|
11750
11827
|
case GGML_TYPE_Q8_K:
|
11751
11828
|
case GGML_TYPE_I8:
|
11752
11829
|
case GGML_TYPE_I16:
|
@@ -12416,6 +12493,92 @@ static void ggml_compute_forward_conv_transpose_1d(
|
|
12416
12493
|
}
|
12417
12494
|
}
|
12418
12495
|
|
12496
|
+
// src0: kernel [OC, IC, KH, KW]
|
12497
|
+
// src1: image [N, IC, IH, IW]
|
12498
|
+
// dst: result [N, OH, OW, IC*KH*KW]
|
12499
|
+
static void ggml_compute_forward_im2col_f32(
|
12500
|
+
const struct ggml_compute_params * params,
|
12501
|
+
const struct ggml_tensor * src0,
|
12502
|
+
const struct ggml_tensor * src1,
|
12503
|
+
struct ggml_tensor * dst) {
|
12504
|
+
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
12505
|
+
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
12506
|
+
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
12507
|
+
|
12508
|
+
int64_t t0 = ggml_perf_time_us();
|
12509
|
+
UNUSED(t0);
|
12510
|
+
|
12511
|
+
GGML_TENSOR_BINARY_OP_LOCALS;
|
12512
|
+
|
12513
|
+
const int32_t s0 = ((const int32_t *)(dst->op_params))[0];
|
12514
|
+
const int32_t s1 = ((const int32_t *)(dst->op_params))[1];
|
12515
|
+
const int32_t p0 = ((const int32_t *)(dst->op_params))[2];
|
12516
|
+
const int32_t p1 = ((const int32_t *)(dst->op_params))[3];
|
12517
|
+
const int32_t d0 = ((const int32_t *)(dst->op_params))[4];
|
12518
|
+
const int32_t d1 = ((const int32_t *)(dst->op_params))[5];
|
12519
|
+
const bool is_2D = ((const int32_t *)(dst->op_params))[6] == 1;
|
12520
|
+
|
12521
|
+
const int ith = params->ith;
|
12522
|
+
const int nth = params->nth;
|
12523
|
+
|
12524
|
+
const int64_t N = is_2D ? ne13 : ne12;
|
12525
|
+
const int64_t IC = is_2D ? ne12 : ne11;
|
12526
|
+
const int64_t IH = is_2D ? ne11 : 1;
|
12527
|
+
const int64_t IW = ne10;
|
12528
|
+
|
12529
|
+
const int64_t KH = is_2D ? ne01 : 1;
|
12530
|
+
const int64_t KW = ne00;
|
12531
|
+
|
12532
|
+
const int64_t OH = is_2D ? ne2 : 1;
|
12533
|
+
const int64_t OW = ne1;
|
12534
|
+
|
12535
|
+
int ofs0 = is_2D ? nb13 : nb12;
|
12536
|
+
int ofs1 = is_2D ? nb12 : nb11;
|
12537
|
+
|
12538
|
+
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
|
12539
|
+
GGML_ASSERT(nb10 == sizeof(float));
|
12540
|
+
|
12541
|
+
if (params->type == GGML_TASK_INIT) {
|
12542
|
+
return;
|
12543
|
+
}
|
12544
|
+
|
12545
|
+
if (params->type == GGML_TASK_FINALIZE) {
|
12546
|
+
return;
|
12547
|
+
}
|
12548
|
+
|
12549
|
+
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
12550
|
+
{
|
12551
|
+
float * const wdata = (float *) dst->data;
|
12552
|
+
|
12553
|
+
for (int64_t in = 0; in < N; in++) {
|
12554
|
+
for (int64_t ioh = 0; ioh < OH; ioh++) { // 1
|
12555
|
+
for (int64_t iow = 0; iow < OW; iow++) {
|
12556
|
+
for (int64_t iic = ith; iic < IC; iic += nth) {
|
12557
|
+
|
12558
|
+
// micro kernel
|
12559
|
+
float * dst_data = wdata + (in*OH*OW + ioh*OW + iow)*(IC*KH*KW); // [IC, KH, KW]
|
12560
|
+
const float * const src_data = (float *)((char *) src1->data + in*ofs0 + iic*ofs1); // [IH, IW]
|
12561
|
+
|
12562
|
+
for (int64_t ikh = 0; ikh < KH; ikh++) { // 1
|
12563
|
+
for (int64_t ikw = 0; ikw < KW; ikw++) {
|
12564
|
+
const int64_t iiw = iow*s0 + ikw*d0 - p0;
|
12565
|
+
const int64_t iih = ioh*s1 + ikh*d1 - p1;
|
12566
|
+
|
12567
|
+
if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
|
12568
|
+
dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0;
|
12569
|
+
} else {
|
12570
|
+
dst_data[iic*(KH*KW) + ikh*KW + ikw] = (src_data[iih*IW + iiw]);
|
12571
|
+
}
|
12572
|
+
}
|
12573
|
+
}
|
12574
|
+
}
|
12575
|
+
}
|
12576
|
+
}
|
12577
|
+
}
|
12578
|
+
}
|
12579
|
+
}
|
12580
|
+
|
12581
|
+
|
12419
12582
|
// src0: kernel [OC, IC, KH, KW]
|
12420
12583
|
// src1: image [N, IC, IH, IW]
|
12421
12584
|
// dst: result [N, OH, OW, IC*KH*KW]
|
@@ -12506,14 +12669,14 @@ static void ggml_compute_forward_im2col(
|
|
12506
12669
|
const struct ggml_tensor * src0,
|
12507
12670
|
const struct ggml_tensor * src1,
|
12508
12671
|
struct ggml_tensor * dst) {
|
12509
|
-
switch (
|
12672
|
+
switch (dst->type) {
|
12510
12673
|
case GGML_TYPE_F16:
|
12511
12674
|
{
|
12512
12675
|
ggml_compute_forward_im2col_f16(params, src0, src1, dst);
|
12513
12676
|
} break;
|
12514
12677
|
case GGML_TYPE_F32:
|
12515
12678
|
{
|
12516
|
-
|
12679
|
+
ggml_compute_forward_im2col_f32(params, src0, src1, dst);
|
12517
12680
|
} break;
|
12518
12681
|
default:
|
12519
12682
|
{
|
@@ -12704,8 +12867,8 @@ static void ggml_compute_forward_pool_2d(
|
|
12704
12867
|
const struct ggml_compute_params * params,
|
12705
12868
|
const struct ggml_tensor * src,
|
12706
12869
|
struct ggml_tensor * dst) {
|
12707
|
-
|
12708
|
-
|
12870
|
+
GGML_ASSERT(src->type == GGML_TYPE_F32);
|
12871
|
+
GGML_ASSERT(params->ith == 0);
|
12709
12872
|
|
12710
12873
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
12711
12874
|
return;
|
@@ -14683,8 +14846,26 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|
14683
14846
|
}
|
14684
14847
|
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
|
14685
14848
|
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
|
14849
|
+
#elif defined(GGML_USE_VULKAN)
|
14850
|
+
const bool skip_cpu = ggml_vk_compute_forward(params, tensor);
|
14851
|
+
#ifdef GGML_VULKAN_CHECK_RESULTS
|
14852
|
+
if (skip_cpu) {
|
14853
|
+
ggml_vk_check_results_1(params, tensor);
|
14854
|
+
}
|
14855
|
+
#endif
|
14856
|
+
if (skip_cpu) {
|
14857
|
+
return;
|
14858
|
+
}
|
14859
|
+
GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_CPU);
|
14860
|
+
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_CPU);
|
14686
14861
|
#endif // GGML_USE_CUBLAS
|
14687
14862
|
|
14863
|
+
#ifdef GGML_USE_SYCL
|
14864
|
+
bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
|
14865
|
+
if (skip_cpu) {
|
14866
|
+
return;
|
14867
|
+
}
|
14868
|
+
#endif // GGML_USE_SYCL
|
14688
14869
|
switch (tensor->op) {
|
14689
14870
|
case GGML_OP_DUP:
|
14690
14871
|
{
|
@@ -15087,13 +15268,13 @@ struct ggml_hash_set ggml_hash_set_new(size_t size) {
|
|
15087
15268
|
size = ggml_hash_size(size);
|
15088
15269
|
struct ggml_hash_set result;
|
15089
15270
|
result.size = size;
|
15090
|
-
result.keys =
|
15271
|
+
result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
|
15091
15272
|
memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
|
15092
15273
|
return result;
|
15093
15274
|
}
|
15094
15275
|
|
15095
15276
|
static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
|
15096
|
-
|
15277
|
+
GGML_FREE(hash_set.keys);
|
15097
15278
|
}
|
15098
15279
|
|
15099
15280
|
struct hash_map {
|
@@ -15102,17 +15283,17 @@ struct hash_map {
|
|
15102
15283
|
};
|
15103
15284
|
|
15104
15285
|
static struct hash_map * ggml_new_hash_map(size_t size) {
|
15105
|
-
struct hash_map * result =
|
15286
|
+
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
|
15106
15287
|
result->set = ggml_hash_set_new(size);
|
15107
|
-
result->vals =
|
15288
|
+
result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size);
|
15108
15289
|
memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
|
15109
15290
|
return result;
|
15110
15291
|
}
|
15111
15292
|
|
15112
15293
|
static void ggml_hash_map_free(struct hash_map * map) {
|
15113
15294
|
ggml_hash_set_free(map->set);
|
15114
|
-
|
15115
|
-
|
15295
|
+
GGML_FREE(map->vals);
|
15296
|
+
GGML_FREE(map);
|
15116
15297
|
}
|
15117
15298
|
|
15118
15299
|
// gradient checkpointing
|
@@ -16597,7 +16778,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
|
16597
16778
|
} break;
|
16598
16779
|
case GGML_OP_SOFT_MAX:
|
16599
16780
|
{
|
16600
|
-
n_tasks = MIN(
|
16781
|
+
n_tasks = MIN(n_threads, ggml_nrows(node->src[0]));
|
16601
16782
|
} break;
|
16602
16783
|
case GGML_OP_CONV_TRANSPOSE_1D:
|
16603
16784
|
{
|
@@ -16890,12 +17071,16 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
16890
17071
|
struct ggml_cplan cplan;
|
16891
17072
|
memset(&cplan, 0, sizeof(struct ggml_cplan));
|
16892
17073
|
|
17074
|
+
int max_tasks = 1;
|
17075
|
+
|
16893
17076
|
// thread scheduling for the different operations + work buffer size estimation
|
16894
17077
|
for (int i = 0; i < cgraph->n_nodes; i++) {
|
16895
17078
|
struct ggml_tensor * node = cgraph->nodes[i];
|
16896
17079
|
|
16897
17080
|
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
16898
17081
|
|
17082
|
+
max_tasks = MAX(max_tasks, n_tasks);
|
17083
|
+
|
16899
17084
|
size_t cur = 0;
|
16900
17085
|
|
16901
17086
|
switch (node->op) {
|
@@ -17062,7 +17247,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
|
17062
17247
|
work_size += CACHE_LINE_SIZE*(n_threads - 1);
|
17063
17248
|
}
|
17064
17249
|
|
17065
|
-
cplan.n_threads = n_threads;
|
17250
|
+
cplan.n_threads = MIN(max_tasks, n_threads);
|
17066
17251
|
cplan.work_size = work_size;
|
17067
17252
|
cplan.work_data = NULL;
|
17068
17253
|
|
@@ -17079,6 +17264,17 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17079
17264
|
}
|
17080
17265
|
}
|
17081
17266
|
|
17267
|
+
#ifdef GGML_USE_VULKAN
|
17268
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
17269
|
+
ggml_vk_preallocate_buffers_graph(cgraph->nodes[i]);
|
17270
|
+
}
|
17271
|
+
ggml_vk_preallocate_buffers();
|
17272
|
+
|
17273
|
+
for (int i = 0; i < cgraph->n_nodes; i++) {
|
17274
|
+
ggml_vk_build_graph(cgraph->nodes[i], i == cgraph->n_nodes - 1);
|
17275
|
+
}
|
17276
|
+
#endif
|
17277
|
+
|
17082
17278
|
const int n_threads = cplan->n_threads;
|
17083
17279
|
|
17084
17280
|
struct ggml_compute_state_shared state_shared = {
|
@@ -17130,6 +17326,10 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
|
|
17130
17326
|
}
|
17131
17327
|
}
|
17132
17328
|
|
17329
|
+
#ifdef GGML_USE_VULKAN
|
17330
|
+
ggml_vk_graph_cleanup();
|
17331
|
+
#endif
|
17332
|
+
|
17133
17333
|
// performance stats (graph)
|
17134
17334
|
{
|
17135
17335
|
int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles;
|
@@ -18770,6 +18970,7 @@ void ggml_quantize_init(enum ggml_type type) {
|
|
18770
18970
|
switch (type) {
|
18771
18971
|
case GGML_TYPE_IQ2_XXS: iq2xs_init_impl(256); break;
|
18772
18972
|
case GGML_TYPE_IQ2_XS: iq2xs_init_impl(512); break;
|
18973
|
+
case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
|
18773
18974
|
default: // nothing
|
18774
18975
|
break;
|
18775
18976
|
}
|
@@ -19032,6 +19233,15 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
|
|
19032
19233
|
result = quantize_iq2_xs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19033
19234
|
GGML_ASSERT(result == row_size * nrows);
|
19034
19235
|
} break;
|
19236
|
+
case GGML_TYPE_IQ3_XXS:
|
19237
|
+
{
|
19238
|
+
GGML_ASSERT(start % QK_K == 0);
|
19239
|
+
GGML_ASSERT(start % n_per_row == 0);
|
19240
|
+
size_t start_row = start / n_per_row;
|
19241
|
+
size_t row_size = ggml_row_size(type, n_per_row);
|
19242
|
+
result = quantize_iq3_xxs(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
|
19243
|
+
GGML_ASSERT(result == row_size * nrows);
|
19244
|
+
} break;
|
19035
19245
|
case GGML_TYPE_F16:
|
19036
19246
|
{
|
19037
19247
|
size_t elemsize = sizeof(ggml_fp16_t);
|
@@ -19158,6 +19368,25 @@ struct gguf_context {
|
|
19158
19368
|
void * data;
|
19159
19369
|
};
|
19160
19370
|
|
19371
|
+
static size_t gguf_type_size(enum gguf_type type) {
|
19372
|
+
GGML_ASSERT(0 <= type && type < GGUF_TYPE_COUNT);
|
19373
|
+
return GGUF_TYPE_SIZE[type];
|
19374
|
+
}
|
19375
|
+
|
19376
|
+
static void gguf_tensor_info_sanitize(struct gguf_tensor_info * info) {
|
19377
|
+
GGML_ASSERT(info->n_dims <= GGML_MAX_DIMS);
|
19378
|
+
GGML_ASSERT(0 <= info->type && info->type < GGML_TYPE_COUNT);
|
19379
|
+
|
19380
|
+
for (uint32_t i = 0; i < info->n_dims; ++i) {
|
19381
|
+
GGML_ASSERT(info->ne[i] > 0);
|
19382
|
+
}
|
19383
|
+
|
19384
|
+
// prevent overflow for total number of elements
|
19385
|
+
GGML_ASSERT(INT64_MAX/info->ne[1] > info->ne[0]);
|
19386
|
+
GGML_ASSERT(INT64_MAX/info->ne[2] > info->ne[0]*info->ne[1]);
|
19387
|
+
GGML_ASSERT(INT64_MAX/info->ne[3] > info->ne[0]*info->ne[1]*info->ne[2]);
|
19388
|
+
}
|
19389
|
+
|
19161
19390
|
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
|
19162
19391
|
const size_t n = fread(dst, 1, size, file);
|
19163
19392
|
*offset += n;
|
@@ -19170,8 +19399,17 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
|
|
19170
19399
|
|
19171
19400
|
bool ok = true;
|
19172
19401
|
|
19173
|
-
ok = ok && gguf_fread_el(file, &p->n,
|
19174
|
-
|
19402
|
+
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset);
|
19403
|
+
|
19404
|
+
// early exit if string length is invalid, prevents from integer overflow
|
19405
|
+
if (p->n == SIZE_MAX) {
|
19406
|
+
fprintf(stderr, "%s: invalid string length (%" PRIu64 ")\n", __func__, p->n);
|
19407
|
+
return false;
|
19408
|
+
}
|
19409
|
+
|
19410
|
+
p->data = GGML_CALLOC(p->n + 1, 1);
|
19411
|
+
|
19412
|
+
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
|
19175
19413
|
|
19176
19414
|
return ok;
|
19177
19415
|
}
|
@@ -19243,6 +19481,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19243
19481
|
return NULL;
|
19244
19482
|
}
|
19245
19483
|
|
19484
|
+
// sanity-checks to prevent from integer/buffer overflows
|
19485
|
+
|
19486
|
+
ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/sizeof(struct gguf_tensor_info));
|
19487
|
+
ok = ok && (ctx->header.n_tensors < (SIZE_MAX/2)/ggml_tensor_overhead());
|
19488
|
+
ok = ok && (ctx->header.n_kv < (SIZE_MAX/2)/sizeof(struct gguf_kv));
|
19489
|
+
|
19246
19490
|
if (!ok) {
|
19247
19491
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
19248
19492
|
fclose(file);
|
@@ -19253,7 +19497,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19253
19497
|
|
19254
19498
|
// read the kv pairs
|
19255
19499
|
{
|
19256
|
-
ctx->kv =
|
19500
|
+
ctx->kv = GGML_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
|
19257
19501
|
|
19258
19502
|
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
|
19259
19503
|
struct gguf_kv * kv = &ctx->kv[i];
|
@@ -19281,7 +19525,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19281
19525
|
case GGUF_TYPE_ARRAY:
|
19282
19526
|
{
|
19283
19527
|
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
|
19284
|
-
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n),
|
19528
|
+
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
|
19285
19529
|
|
19286
19530
|
switch (kv->value.arr.type) {
|
19287
19531
|
case GGUF_TYPE_UINT8:
|
@@ -19296,21 +19540,39 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19296
19540
|
case GGUF_TYPE_FLOAT64:
|
19297
19541
|
case GGUF_TYPE_BOOL:
|
19298
19542
|
{
|
19299
|
-
|
19300
|
-
|
19543
|
+
// prevent from integer overflow in the malloc below
|
19544
|
+
if (kv->value.arr.n >= SIZE_MAX/gguf_type_size(kv->value.arr.type)) {
|
19545
|
+
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
19546
|
+
fclose(file);
|
19547
|
+
gguf_free(ctx);
|
19548
|
+
return NULL;
|
19549
|
+
}
|
19550
|
+
|
19551
|
+
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
19552
|
+
|
19553
|
+
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type), &offset);
|
19301
19554
|
} break;
|
19302
19555
|
case GGUF_TYPE_STRING:
|
19303
19556
|
{
|
19304
|
-
|
19557
|
+
// prevent from integer overflow in the malloc below
|
19558
|
+
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct gguf_str)) {
|
19559
|
+
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
19560
|
+
fclose(file);
|
19561
|
+
gguf_free(ctx);
|
19562
|
+
return NULL;
|
19563
|
+
}
|
19564
|
+
|
19565
|
+
kv->value.arr.data = GGML_MALLOC(kv->value.arr.n * sizeof(struct gguf_str));
|
19566
|
+
|
19305
19567
|
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
19306
19568
|
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
|
19307
19569
|
}
|
19308
19570
|
} break;
|
19309
19571
|
case GGUF_TYPE_ARRAY:
|
19310
|
-
|
19572
|
+
default: GGML_ASSERT(false && "invalid type"); break;
|
19311
19573
|
}
|
19312
19574
|
} break;
|
19313
|
-
|
19575
|
+
default: GGML_ASSERT(false && "invalid type");
|
19314
19576
|
}
|
19315
19577
|
|
19316
19578
|
if (!ok) {
|
@@ -19328,7 +19590,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19328
19590
|
|
19329
19591
|
// read the tensor infos
|
19330
19592
|
{
|
19331
|
-
ctx->infos =
|
19593
|
+
ctx->infos = GGML_MALLOC(ctx->header.n_tensors * sizeof(struct gguf_tensor_info));
|
19332
19594
|
|
19333
19595
|
for (uint64_t i = 0; i < ctx->header.n_tensors; ++i) {
|
19334
19596
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
@@ -19339,12 +19601,18 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
|
|
19339
19601
|
|
19340
19602
|
ok = ok && gguf_fread_str(file, &info->name, &offset);
|
19341
19603
|
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
|
19604
|
+
|
19605
|
+
ok = ok && (info->n_dims <= GGML_MAX_DIMS);
|
19606
|
+
|
19342
19607
|
for (uint32_t j = 0; j < info->n_dims; ++j) {
|
19343
19608
|
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
|
19344
19609
|
}
|
19610
|
+
|
19345
19611
|
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
|
19346
19612
|
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
|
19347
19613
|
|
19614
|
+
gguf_tensor_info_sanitize(info);
|
19615
|
+
|
19348
19616
|
if (!ok) {
|
19349
19617
|
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
19350
19618
|
fclose(file);
|
@@ -19498,12 +19766,12 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19498
19766
|
struct gguf_kv * kv = &ctx->kv[i];
|
19499
19767
|
|
19500
19768
|
if (kv->key.data) {
|
19501
|
-
|
19769
|
+
GGML_FREE(kv->key.data);
|
19502
19770
|
}
|
19503
19771
|
|
19504
19772
|
if (kv->type == GGUF_TYPE_STRING) {
|
19505
19773
|
if (kv->value.str.data) {
|
19506
|
-
|
19774
|
+
GGML_FREE(kv->value.str.data);
|
19507
19775
|
}
|
19508
19776
|
}
|
19509
19777
|
|
@@ -19513,16 +19781,16 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19513
19781
|
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
|
19514
19782
|
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
|
19515
19783
|
if (str->data) {
|
19516
|
-
|
19784
|
+
GGML_FREE(str->data);
|
19517
19785
|
}
|
19518
19786
|
}
|
19519
19787
|
}
|
19520
|
-
|
19788
|
+
GGML_FREE(kv->value.arr.data);
|
19521
19789
|
}
|
19522
19790
|
}
|
19523
19791
|
}
|
19524
19792
|
|
19525
|
-
|
19793
|
+
GGML_FREE(ctx->kv);
|
19526
19794
|
}
|
19527
19795
|
|
19528
19796
|
if (ctx->infos) {
|
@@ -19530,11 +19798,11 @@ void gguf_free(struct gguf_context * ctx) {
|
|
19530
19798
|
struct gguf_tensor_info * info = &ctx->infos[i];
|
19531
19799
|
|
19532
19800
|
if (info->name.data) {
|
19533
|
-
|
19801
|
+
GGML_FREE(info->name.data);
|
19534
19802
|
}
|
19535
19803
|
}
|
19536
19804
|
|
19537
|
-
|
19805
|
+
GGML_FREE(ctx->infos);
|
19538
19806
|
}
|
19539
19807
|
|
19540
19808
|
GGML_ALIGNED_FREE(ctx);
|
@@ -19835,8 +20103,8 @@ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_ty
|
|
19835
20103
|
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
19836
20104
|
ctx->kv[idx].value.arr.type = type;
|
19837
20105
|
ctx->kv[idx].value.arr.n = n;
|
19838
|
-
ctx->kv[idx].value.arr.data =
|
19839
|
-
memcpy(ctx->kv[idx].value.arr.data, data, n*
|
20106
|
+
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*gguf_type_size(type));
|
20107
|
+
memcpy(ctx->kv[idx].value.arr.data, data, n*gguf_type_size(type));
|
19840
20108
|
}
|
19841
20109
|
|
19842
20110
|
void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
|
@@ -19845,7 +20113,7 @@ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char **
|
|
19845
20113
|
ctx->kv[idx].type = GGUF_TYPE_ARRAY;
|
19846
20114
|
ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
|
19847
20115
|
ctx->kv[idx].value.arr.n = n;
|
19848
|
-
ctx->kv[idx].value.arr.data =
|
20116
|
+
ctx->kv[idx].value.arr.data = GGML_MALLOC(n*sizeof(struct gguf_str));
|
19849
20117
|
for (int i = 0; i < n; i++) {
|
19850
20118
|
struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
|
19851
20119
|
str->n = strlen(data[i]);
|
@@ -19872,19 +20140,19 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
|
|
19872
20140
|
case GGUF_TYPE_ARRAY:
|
19873
20141
|
{
|
19874
20142
|
if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
|
19875
|
-
const char ** data =
|
20143
|
+
const char ** data = GGML_MALLOC(src->kv[i].value.arr.n*sizeof(char *));
|
19876
20144
|
for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
|
19877
20145
|
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
|
19878
20146
|
}
|
19879
20147
|
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
|
19880
|
-
|
20148
|
+
GGML_FREE((void *)data);
|
19881
20149
|
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
|
19882
20150
|
GGML_ASSERT(false && "nested arrays not supported");
|
19883
20151
|
} else {
|
19884
20152
|
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
|
19885
20153
|
}
|
19886
20154
|
} break;
|
19887
|
-
|
20155
|
+
default: GGML_ASSERT(false && "invalid type"); break;
|
19888
20156
|
}
|
19889
20157
|
}
|
19890
20158
|
}
|
@@ -19960,7 +20228,7 @@ struct gguf_buf {
|
|
19960
20228
|
|
19961
20229
|
static struct gguf_buf gguf_buf_init(size_t size) {
|
19962
20230
|
struct gguf_buf buf = {
|
19963
|
-
/*buf.data =*/ size == 0 ? NULL :
|
20231
|
+
/*buf.data =*/ size == 0 ? NULL : GGML_MALLOC(size),
|
19964
20232
|
/*buf.size =*/ size,
|
19965
20233
|
/*buf.offset =*/ 0,
|
19966
20234
|
};
|
@@ -19970,7 +20238,7 @@ static struct gguf_buf gguf_buf_init(size_t size) {
|
|
19970
20238
|
|
19971
20239
|
static void gguf_buf_free(struct gguf_buf buf) {
|
19972
20240
|
if (buf.data) {
|
19973
|
-
|
20241
|
+
GGML_FREE(buf.data);
|
19974
20242
|
}
|
19975
20243
|
}
|
19976
20244
|
|
@@ -20051,7 +20319,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|
20051
20319
|
case GGUF_TYPE_FLOAT64:
|
20052
20320
|
case GGUF_TYPE_BOOL:
|
20053
20321
|
{
|
20054
|
-
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n *
|
20322
|
+
gguf_bwrite_el(buf, kv->value.arr.data, kv->value.arr.n * gguf_type_size(kv->value.arr.type));
|
20055
20323
|
} break;
|
20056
20324
|
case GGUF_TYPE_STRING:
|
20057
20325
|
{
|
@@ -20060,10 +20328,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
|
|
20060
20328
|
}
|
20061
20329
|
} break;
|
20062
20330
|
case GGUF_TYPE_ARRAY:
|
20063
|
-
|
20331
|
+
default: GGML_ASSERT(false && "invalid type"); break;
|
20064
20332
|
}
|
20065
20333
|
} break;
|
20066
|
-
|
20334
|
+
default: GGML_ASSERT(false && "invalid type");
|
20067
20335
|
}
|
20068
20336
|
}
|
20069
20337
|
|
@@ -20264,7 +20532,7 @@ int ggml_cpu_has_wasm_simd(void) {
|
|
20264
20532
|
}
|
20265
20533
|
|
20266
20534
|
int ggml_cpu_has_blas(void) {
|
20267
|
-
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
|
20535
|
+
#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS) || defined(GGML_USE_CUBLAS) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_SYCL)
|
20268
20536
|
return 1;
|
20269
20537
|
#else
|
20270
20538
|
return 0;
|
@@ -20287,8 +20555,33 @@ int ggml_cpu_has_clblast(void) {
|
|
20287
20555
|
#endif
|
20288
20556
|
}
|
20289
20557
|
|
20558
|
+
int ggml_cpu_has_vulkan(void) {
|
20559
|
+
#if defined(GGML_USE_VULKAN)
|
20560
|
+
return 1;
|
20561
|
+
#else
|
20562
|
+
return 0;
|
20563
|
+
#endif
|
20564
|
+
}
|
20565
|
+
|
20566
|
+
int ggml_cpu_has_kompute(void) {
|
20567
|
+
#if defined(GGML_USE_KOMPUTE)
|
20568
|
+
return 1;
|
20569
|
+
#else
|
20570
|
+
return 0;
|
20571
|
+
#endif
|
20572
|
+
}
|
20573
|
+
|
20574
|
+
int ggml_cpu_has_sycl(void) {
|
20575
|
+
#if defined(GGML_USE_SYCL)
|
20576
|
+
return 1;
|
20577
|
+
#else
|
20578
|
+
return 0;
|
20579
|
+
#endif
|
20580
|
+
}
|
20581
|
+
|
20290
20582
|
int ggml_cpu_has_gpublas(void) {
|
20291
|
-
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast()
|
20583
|
+
return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
|
20584
|
+
ggml_cpu_has_sycl();
|
20292
20585
|
}
|
20293
20586
|
|
20294
20587
|
int ggml_cpu_has_sse3(void) {
|