cui-llama.rn 1.3.3 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/android/src/main/CMakeLists.txt +5 -7
- package/android/src/main/java/com/rnllama/LlamaContext.java +4 -4
- package/android/src/main/jni.cpp +9 -9
- package/cpp/common.cpp +28 -44
- package/cpp/common.h +35 -14
- package/cpp/ggml-alloc.c +0 -1
- package/cpp/ggml-backend-impl.h +38 -20
- package/cpp/ggml-backend-reg.cpp +246 -92
- package/cpp/ggml-backend.h +1 -0
- package/cpp/ggml-common.h +42 -48
- package/cpp/{ggml-cpu-aarch64.c → ggml-cpu-aarch64.cpp} +642 -223
- package/cpp/ggml-cpu-aarch64.h +2 -26
- package/cpp/ggml-cpu-traits.cpp +36 -0
- package/cpp/ggml-cpu-traits.h +38 -0
- package/cpp/ggml-cpu.c +14122 -13971
- package/cpp/ggml-cpu.cpp +627 -715
- package/cpp/ggml-cpu.h +0 -17
- package/cpp/ggml-impl.h +22 -6
- package/cpp/ggml-metal.m +482 -24
- package/cpp/ggml-quants.c +0 -9
- package/cpp/ggml-threading.h +4 -2
- package/cpp/ggml.c +284 -178
- package/cpp/ggml.h +73 -25
- package/cpp/llama-grammar.cpp +15 -15
- package/cpp/llama-grammar.h +2 -5
- package/cpp/llama-sampling.cpp +35 -90
- package/cpp/llama-vocab.cpp +7 -2
- package/cpp/llama-vocab.h +1 -1
- package/cpp/llama.cpp +1782 -586
- package/cpp/llama.h +20 -19
- package/cpp/sampling.cpp +11 -16
- package/cpp/sgemm.cpp +265 -258
- package/cpp/sgemm.h +2 -2
- package/cpp/speculative.cpp +4 -0
- package/cpp/unicode.cpp +51 -51
- package/cpp/unicode.h +9 -10
- package/lib/commonjs/index.js +38 -1
- package/lib/commonjs/index.js.map +1 -1
- package/lib/module/index.js +36 -0
- package/lib/module/index.js.map +1 -1
- package/lib/typescript/NativeRNLlama.d.ts +2 -3
- package/lib/typescript/NativeRNLlama.d.ts.map +1 -1
- package/lib/typescript/index.d.ts +36 -2
- package/lib/typescript/index.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/NativeRNLlama.ts +3 -3
- package/src/index.ts +46 -2
- package/cpp/amx/amx.cpp +0 -196
- package/cpp/amx/amx.h +0 -20
- package/cpp/amx/common.h +0 -101
- package/cpp/amx/mmq.cpp +0 -2524
- package/cpp/amx/mmq.h +0 -16
- package/cpp/ggml-aarch64.c +0 -129
- package/cpp/ggml-aarch64.h +0 -19
package/cpp/ggml.c
CHANGED
@@ -8,7 +8,10 @@
|
|
8
8
|
|
9
9
|
// FIXME: required here for quantization functions
|
10
10
|
#include "ggml-quants.h"
|
11
|
-
|
11
|
+
|
12
|
+
#ifdef LM_GGML_USE_CPU_HBM
|
13
|
+
#include <hbwmalloc.h>
|
14
|
+
#endif
|
12
15
|
|
13
16
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
14
17
|
#include <malloc.h> // using malloc.h with MSC/MINGW
|
@@ -801,32 +804,23 @@ static const struct lm_ggml_type_traits type_traits[LM_GGML_TYPE_COUNT] = {
|
|
801
804
|
.to_float = (lm_ggml_to_float_t) lm_ggml_bf16_to_fp32_row,
|
802
805
|
.from_float_ref = (lm_ggml_from_float_t) lm_ggml_fp32_to_bf16_row_ref,
|
803
806
|
},
|
804
|
-
[
|
805
|
-
.type_name = "
|
806
|
-
.blck_size =
|
807
|
-
.
|
808
|
-
.
|
809
|
-
.is_quantized = true,
|
810
|
-
.to_float = NULL,
|
811
|
-
.from_float_ref = NULL,
|
807
|
+
[31] = { // LM_GGML_TYPE_Q4_0_4_4
|
808
|
+
.type_name = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
|
809
|
+
.blck_size = 0,
|
810
|
+
.type_size = 0,
|
811
|
+
.is_quantized = false,
|
812
812
|
},
|
813
|
-
[
|
814
|
-
.type_name = "
|
815
|
-
.blck_size =
|
816
|
-
.
|
817
|
-
.
|
818
|
-
.is_quantized = true,
|
819
|
-
.to_float = NULL,
|
820
|
-
.from_float_ref = NULL,
|
813
|
+
[32] = { // LM_GGML_TYPE_Q4_0_4_8
|
814
|
+
.type_name = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
|
815
|
+
.blck_size = 0,
|
816
|
+
.type_size = 0,
|
817
|
+
.is_quantized = false,
|
821
818
|
},
|
822
|
-
[
|
823
|
-
.type_name = "
|
824
|
-
.blck_size =
|
825
|
-
.
|
826
|
-
.
|
827
|
-
.is_quantized = true,
|
828
|
-
.to_float = NULL,
|
829
|
-
.from_float_ref = NULL,
|
819
|
+
[33] = { // LM_GGML_TYPE_Q4_0_8_8
|
820
|
+
.type_name = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
|
821
|
+
.blck_size = 0,
|
822
|
+
.type_size = 0,
|
823
|
+
.is_quantized = false,
|
830
824
|
},
|
831
825
|
[LM_GGML_TYPE_TQ1_0] = {
|
832
826
|
.type_name = "tq1_0",
|
@@ -844,14 +838,23 @@ static const struct lm_ggml_type_traits type_traits[LM_GGML_TYPE_COUNT] = {
|
|
844
838
|
.to_float = (lm_ggml_to_float_t) dequantize_row_tq2_0,
|
845
839
|
.from_float_ref = (lm_ggml_from_float_t) quantize_row_tq2_0_ref,
|
846
840
|
},
|
847
|
-
[
|
848
|
-
.type_name = "
|
849
|
-
.blck_size =
|
850
|
-
.
|
851
|
-
.
|
852
|
-
|
853
|
-
|
854
|
-
.
|
841
|
+
[36] = { // LM_GGML_TYPE_IQ4_NL_4_4
|
842
|
+
.type_name = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
|
843
|
+
.blck_size = 0,
|
844
|
+
.type_size = 0,
|
845
|
+
.is_quantized = false,
|
846
|
+
},
|
847
|
+
[37] = { // LM_GGML_TYPE_IQ4_NL_4_8
|
848
|
+
.type_name = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
|
849
|
+
.blck_size = 0,
|
850
|
+
.type_size = 0,
|
851
|
+
.is_quantized = false,
|
852
|
+
},
|
853
|
+
[38] = { // LM_GGML_TYPE_IQ4_NL_8_8
|
854
|
+
.type_name = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
|
855
|
+
.blck_size = 0,
|
856
|
+
.type_size = 0,
|
857
|
+
.is_quantized = false,
|
855
858
|
},
|
856
859
|
};
|
857
860
|
|
@@ -963,6 +966,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
|
|
963
966
|
"POOL_2D_BACK",
|
964
967
|
"UPSCALE",
|
965
968
|
"PAD",
|
969
|
+
"PAD_REFLECT_1D",
|
966
970
|
"ARANGE",
|
967
971
|
"TIMESTEP_EMBEDDING",
|
968
972
|
"ARGSORT",
|
@@ -996,7 +1000,7 @@ static const char * LM_GGML_OP_NAME[LM_GGML_OP_COUNT] = {
|
|
996
1000
|
"OPT_STEP_ADAMW",
|
997
1001
|
};
|
998
1002
|
|
999
|
-
static_assert(LM_GGML_OP_COUNT ==
|
1003
|
+
static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
|
1000
1004
|
|
1001
1005
|
static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
|
1002
1006
|
"none",
|
@@ -1058,6 +1062,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
|
|
1058
1062
|
"pool_2d_back(x)",
|
1059
1063
|
"upscale(x)",
|
1060
1064
|
"pad(x)",
|
1065
|
+
"pad_reflect_1d(x)",
|
1061
1066
|
"arange(start, stop, step)",
|
1062
1067
|
"timestep_embedding(timesteps, dim, max_period)",
|
1063
1068
|
"argsort(x)",
|
@@ -1091,7 +1096,7 @@ static const char * LM_GGML_OP_SYMBOL[LM_GGML_OP_COUNT] = {
|
|
1091
1096
|
"adamw(x)",
|
1092
1097
|
};
|
1093
1098
|
|
1094
|
-
static_assert(LM_GGML_OP_COUNT ==
|
1099
|
+
static_assert(LM_GGML_OP_COUNT == 82, "LM_GGML_OP_COUNT != 82");
|
1095
1100
|
|
1096
1101
|
static_assert(LM_GGML_OP_POOL_COUNT == 2, "LM_GGML_OP_POOL_COUNT != 2");
|
1097
1102
|
|
@@ -1281,9 +1286,6 @@ enum lm_ggml_type lm_ggml_ftype_to_lm_ggml_type(enum lm_ggml_ftype ftype) {
|
|
1281
1286
|
case LM_GGML_FTYPE_MOSTLY_IQ4_XS: wtype = LM_GGML_TYPE_IQ4_XS; break;
|
1282
1287
|
case LM_GGML_FTYPE_MOSTLY_IQ3_S: wtype = LM_GGML_TYPE_IQ3_S; break;
|
1283
1288
|
case LM_GGML_FTYPE_MOSTLY_IQ2_S: wtype = LM_GGML_TYPE_IQ2_S; break;
|
1284
|
-
case LM_GGML_FTYPE_MOSTLY_Q4_0_4_4: wtype = LM_GGML_TYPE_Q4_0_4_4; break;
|
1285
|
-
case LM_GGML_FTYPE_MOSTLY_Q4_0_4_8: wtype = LM_GGML_TYPE_Q4_0_4_8; break;
|
1286
|
-
case LM_GGML_FTYPE_MOSTLY_Q4_0_8_8: wtype = LM_GGML_TYPE_Q4_0_8_8; break;
|
1287
1289
|
case LM_GGML_FTYPE_UNKNOWN: wtype = LM_GGML_TYPE_COUNT; break;
|
1288
1290
|
case LM_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = LM_GGML_TYPE_COUNT; break;
|
1289
1291
|
}
|
@@ -3528,15 +3530,18 @@ static struct lm_ggml_tensor * lm_ggml_rope_impl(
|
|
3528
3530
|
LM_GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
3529
3531
|
}
|
3530
3532
|
|
3533
|
+
int sections[4] = {0, 0, 0, 0};
|
3534
|
+
|
3531
3535
|
struct lm_ggml_tensor * result = inplace ? lm_ggml_view_tensor(ctx, a) : lm_ggml_dup_tensor(ctx, a);
|
3532
3536
|
|
3533
|
-
int32_t params[
|
3537
|
+
int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
3534
3538
|
memcpy(params + 5, &freq_base, sizeof(float));
|
3535
3539
|
memcpy(params + 6, &freq_scale, sizeof(float));
|
3536
3540
|
memcpy(params + 7, &ext_factor, sizeof(float));
|
3537
3541
|
memcpy(params + 8, &attn_factor, sizeof(float));
|
3538
3542
|
memcpy(params + 9, &beta_fast, sizeof(float));
|
3539
3543
|
memcpy(params + 10, &beta_slow, sizeof(float));
|
3544
|
+
memcpy(params + 11, §ions, sizeof(int)*4);
|
3540
3545
|
lm_ggml_set_op_params(result, params, sizeof(params));
|
3541
3546
|
|
3542
3547
|
result->op = LM_GGML_OP_ROPE;
|
@@ -3558,6 +3563,53 @@ struct lm_ggml_tensor * lm_ggml_rope(
|
|
3558
3563
|
);
|
3559
3564
|
}
|
3560
3565
|
|
3566
|
+
struct lm_ggml_tensor * lm_ggml_rope_multi(
|
3567
|
+
struct lm_ggml_context * ctx,
|
3568
|
+
struct lm_ggml_tensor * a,
|
3569
|
+
struct lm_ggml_tensor * b,
|
3570
|
+
struct lm_ggml_tensor * c,
|
3571
|
+
int n_dims,
|
3572
|
+
int sections[4],
|
3573
|
+
int mode,
|
3574
|
+
int n_ctx_orig,
|
3575
|
+
float freq_base,
|
3576
|
+
float freq_scale,
|
3577
|
+
float ext_factor,
|
3578
|
+
float attn_factor,
|
3579
|
+
float beta_fast,
|
3580
|
+
float beta_slow) {
|
3581
|
+
// Multimodal Rotary Position Embedding
|
3582
|
+
LM_GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
|
3583
|
+
|
3584
|
+
LM_GGML_ASSERT(lm_ggml_is_vector(b));
|
3585
|
+
LM_GGML_ASSERT(b->type == LM_GGML_TYPE_I32);
|
3586
|
+
LM_GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
|
3587
|
+
|
3588
|
+
if (c) {
|
3589
|
+
LM_GGML_ASSERT(c->type == LM_GGML_TYPE_F32);
|
3590
|
+
LM_GGML_ASSERT(c->ne[0] >= n_dims / 2);
|
3591
|
+
}
|
3592
|
+
|
3593
|
+
struct lm_ggml_tensor * result = lm_ggml_dup_tensor(ctx, a);
|
3594
|
+
|
3595
|
+
int32_t params[11 + 4] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
|
3596
|
+
memcpy(params + 5, &freq_base, sizeof(float));
|
3597
|
+
memcpy(params + 6, &freq_scale, sizeof(float));
|
3598
|
+
memcpy(params + 7, &ext_factor, sizeof(float));
|
3599
|
+
memcpy(params + 8, &attn_factor, sizeof(float));
|
3600
|
+
memcpy(params + 9, &beta_fast, sizeof(float));
|
3601
|
+
memcpy(params + 10, &beta_slow, sizeof(float));
|
3602
|
+
memcpy(¶ms[11], sections, sizeof(int)*4);
|
3603
|
+
lm_ggml_set_op_params(result, params, sizeof(params));
|
3604
|
+
|
3605
|
+
result->op = LM_GGML_OP_ROPE;
|
3606
|
+
result->src[0] = a;
|
3607
|
+
result->src[1] = b;
|
3608
|
+
result->src[2] = c;
|
3609
|
+
|
3610
|
+
return result;
|
3611
|
+
}
|
3612
|
+
|
3561
3613
|
struct lm_ggml_tensor * lm_ggml_rope_inplace(
|
3562
3614
|
struct lm_ggml_context * ctx,
|
3563
3615
|
struct lm_ggml_tensor * a,
|
@@ -3721,13 +3773,84 @@ struct lm_ggml_tensor * lm_ggml_clamp(
|
|
3721
3773
|
return result;
|
3722
3774
|
}
|
3723
3775
|
|
3724
|
-
// lm_ggml_conv_1d
|
3725
|
-
|
3726
3776
|
static int64_t lm_ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
3727
3777
|
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
|
3728
3778
|
}
|
3729
3779
|
|
3730
|
-
|
3780
|
+
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
3781
|
+
// a: [OC,IC, KH, KW]
|
3782
|
+
// b: [N, IC, IH, IW]
|
3783
|
+
// result: [N, OH, OW, IC*KH*KW]
|
3784
|
+
struct lm_ggml_tensor * lm_ggml_im2col(
|
3785
|
+
struct lm_ggml_context * ctx,
|
3786
|
+
struct lm_ggml_tensor * a,
|
3787
|
+
struct lm_ggml_tensor * b,
|
3788
|
+
int s0,
|
3789
|
+
int s1,
|
3790
|
+
int p0,
|
3791
|
+
int p1,
|
3792
|
+
int d0,
|
3793
|
+
int d1,
|
3794
|
+
bool is_2D,
|
3795
|
+
enum lm_ggml_type dst_type) {
|
3796
|
+
if (is_2D) {
|
3797
|
+
LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
|
3798
|
+
} else {
|
3799
|
+
//LM_GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
|
3800
|
+
LM_GGML_ASSERT(b->ne[1] == a->ne[1]);
|
3801
|
+
LM_GGML_ASSERT(b->ne[3] == 1);
|
3802
|
+
}
|
3803
|
+
|
3804
|
+
const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
3805
|
+
const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
3806
|
+
|
3807
|
+
LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
|
3808
|
+
LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
|
3809
|
+
|
3810
|
+
const int64_t ne[4] = {
|
3811
|
+
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
3812
|
+
OW,
|
3813
|
+
is_2D ? OH : b->ne[2],
|
3814
|
+
is_2D ? b->ne[3] : 1,
|
3815
|
+
};
|
3816
|
+
|
3817
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
|
3818
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3819
|
+
lm_ggml_set_op_params(result, params, sizeof(params));
|
3820
|
+
|
3821
|
+
result->op = LM_GGML_OP_IM2COL;
|
3822
|
+
result->src[0] = a;
|
3823
|
+
result->src[1] = b;
|
3824
|
+
|
3825
|
+
return result;
|
3826
|
+
}
|
3827
|
+
|
3828
|
+
struct lm_ggml_tensor * lm_ggml_im2col_back(
|
3829
|
+
struct lm_ggml_context * ctx,
|
3830
|
+
struct lm_ggml_tensor * a,
|
3831
|
+
struct lm_ggml_tensor * b,
|
3832
|
+
int64_t * ne,
|
3833
|
+
int s0,
|
3834
|
+
int s1,
|
3835
|
+
int p0,
|
3836
|
+
int p1,
|
3837
|
+
int d0,
|
3838
|
+
int d1,
|
3839
|
+
bool is_2D) {
|
3840
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3841
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3842
|
+
lm_ggml_set_op_params(result, params, sizeof(params));
|
3843
|
+
|
3844
|
+
result->op = LM_GGML_OP_IM2COL_BACK;
|
3845
|
+
result->src[0] = a;
|
3846
|
+
result->src[1] = b;
|
3847
|
+
|
3848
|
+
return result;
|
3849
|
+
}
|
3850
|
+
|
3851
|
+
// lm_ggml_conv_1d
|
3852
|
+
|
3853
|
+
struct lm_ggml_tensor * lm_ggml_conv_1d(
|
3731
3854
|
struct lm_ggml_context * ctx,
|
3732
3855
|
struct lm_ggml_tensor * a,
|
3733
3856
|
struct lm_ggml_tensor * b,
|
@@ -3757,137 +3880,75 @@ struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
|
|
3757
3880
|
return lm_ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
|
3758
3881
|
}
|
3759
3882
|
|
3760
|
-
//
|
3761
|
-
|
3762
|
-
static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
3763
|
-
return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
|
3764
|
-
}
|
3883
|
+
// lm_ggml_conv_1d_dw
|
3765
3884
|
|
3766
|
-
|
3885
|
+
struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
|
3767
3886
|
struct lm_ggml_context * ctx,
|
3768
3887
|
struct lm_ggml_tensor * a,
|
3769
3888
|
struct lm_ggml_tensor * b,
|
3770
3889
|
int s0,
|
3771
3890
|
int p0,
|
3772
3891
|
int d0) {
|
3773
|
-
|
3774
|
-
|
3775
|
-
LM_GGML_ASSERT(a->ne[3] == 1);
|
3892
|
+
struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
|
3893
|
+
struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
|
3776
3894
|
|
3777
|
-
|
3778
|
-
LM_GGML_ASSERT(d0 == 1);
|
3779
|
-
|
3780
|
-
const int64_t ne[4] = {
|
3781
|
-
lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
|
3782
|
-
a->ne[1], b->ne[2], 1,
|
3783
|
-
};
|
3784
|
-
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3895
|
+
struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, LM_GGML_TYPE_F16);
|
3785
3896
|
|
3786
|
-
|
3787
|
-
lm_ggml_set_op_params(result, params, sizeof(params));
|
3897
|
+
struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, im2col, a);
|
3788
3898
|
|
3789
|
-
result
|
3790
|
-
result->src[0] = a;
|
3791
|
-
result->src[1] = b;
|
3899
|
+
result = lm_ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
|
3792
3900
|
|
3793
3901
|
return result;
|
3794
3902
|
}
|
3795
3903
|
|
3796
|
-
//
|
3904
|
+
// lm_ggml_conv_1d_dw_ph
|
3797
3905
|
|
3798
|
-
struct lm_ggml_tensor *
|
3906
|
+
struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
|
3799
3907
|
struct lm_ggml_context * ctx,
|
3800
3908
|
struct lm_ggml_tensor * a,
|
3801
3909
|
struct lm_ggml_tensor * b,
|
3802
3910
|
int s0,
|
3803
|
-
int
|
3804
|
-
|
3805
|
-
|
3806
|
-
int d0,
|
3807
|
-
int d1) {
|
3808
|
-
struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
3809
|
-
struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
|
3810
|
-
lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
3811
|
-
s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
3812
|
-
struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
3911
|
+
int d0) {
|
3912
|
+
return lm_ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
|
3913
|
+
}
|
3813
3914
|
|
3814
|
-
|
3815
|
-
struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
|
3816
|
-
result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
3915
|
+
// lm_ggml_conv_transpose_1d
|
3817
3916
|
|
3818
|
-
|
3917
|
+
static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
3918
|
+
return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
|
3819
3919
|
}
|
3820
|
-
// lm_ggml_conv_2d
|
3821
3920
|
|
3822
|
-
|
3823
|
-
// a: [OC,IC, KH, KW]
|
3824
|
-
// b: [N, IC, IH, IW]
|
3825
|
-
// result: [N, OH, OW, IC*KH*KW]
|
3826
|
-
struct lm_ggml_tensor * lm_ggml_im2col(
|
3921
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
|
3827
3922
|
struct lm_ggml_context * ctx,
|
3828
3923
|
struct lm_ggml_tensor * a,
|
3829
3924
|
struct lm_ggml_tensor * b,
|
3830
3925
|
int s0,
|
3831
|
-
int s1,
|
3832
3926
|
int p0,
|
3833
|
-
int
|
3834
|
-
|
3835
|
-
|
3836
|
-
|
3837
|
-
enum lm_ggml_type dst_type) {
|
3838
|
-
if(is_2D) {
|
3839
|
-
LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
|
3840
|
-
} else {
|
3841
|
-
LM_GGML_ASSERT(a->ne[1] == b->ne[1]);
|
3842
|
-
LM_GGML_ASSERT(b->ne[3] == 1);
|
3843
|
-
}
|
3844
|
-
|
3845
|
-
const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
3846
|
-
const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
3927
|
+
int d0) {
|
3928
|
+
LM_GGML_ASSERT(lm_ggml_is_matrix(b));
|
3929
|
+
LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
|
3930
|
+
LM_GGML_ASSERT(a->ne[3] == 1);
|
3847
3931
|
|
3848
|
-
LM_GGML_ASSERT(
|
3849
|
-
LM_GGML_ASSERT(
|
3932
|
+
LM_GGML_ASSERT(p0 == 0);
|
3933
|
+
LM_GGML_ASSERT(d0 == 1);
|
3850
3934
|
|
3851
3935
|
const int64_t ne[4] = {
|
3852
|
-
|
3853
|
-
|
3854
|
-
is_2D ? OH : b->ne[2],
|
3855
|
-
is_2D ? b->ne[3] : 1,
|
3936
|
+
lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
|
3937
|
+
a->ne[1], b->ne[2], 1,
|
3856
3938
|
};
|
3939
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3857
3940
|
|
3858
|
-
|
3859
|
-
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3941
|
+
int32_t params[] = { s0, p0, d0 };
|
3860
3942
|
lm_ggml_set_op_params(result, params, sizeof(params));
|
3861
3943
|
|
3862
|
-
result->op =
|
3944
|
+
result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
|
3863
3945
|
result->src[0] = a;
|
3864
3946
|
result->src[1] = b;
|
3865
3947
|
|
3866
3948
|
return result;
|
3867
3949
|
}
|
3868
3950
|
|
3869
|
-
|
3870
|
-
struct lm_ggml_context * ctx,
|
3871
|
-
struct lm_ggml_tensor * a,
|
3872
|
-
struct lm_ggml_tensor * b,
|
3873
|
-
int64_t * ne,
|
3874
|
-
int s0,
|
3875
|
-
int s1,
|
3876
|
-
int p0,
|
3877
|
-
int p1,
|
3878
|
-
int d0,
|
3879
|
-
int d1,
|
3880
|
-
bool is_2D) {
|
3881
|
-
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3882
|
-
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3883
|
-
lm_ggml_set_op_params(result, params, sizeof(params));
|
3884
|
-
|
3885
|
-
result->op = LM_GGML_OP_IM2COL_BACK;
|
3886
|
-
result->src[0] = a;
|
3887
|
-
result->src[1] = b;
|
3888
|
-
|
3889
|
-
return result;
|
3890
|
-
}
|
3951
|
+
// lm_ggml_conv_2d
|
3891
3952
|
|
3892
3953
|
// a: [OC,IC, KH, KW]
|
3893
3954
|
// b: [N, IC, IH, IW]
|
@@ -3934,6 +3995,31 @@ struct lm_ggml_tensor * lm_ggml_conv_2d_s1_ph(
|
|
3934
3995
|
return lm_ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
|
3935
3996
|
}
|
3936
3997
|
|
3998
|
+
// lm_ggml_conv_2d_dw
|
3999
|
+
|
4000
|
+
struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
|
4001
|
+
struct lm_ggml_context * ctx,
|
4002
|
+
struct lm_ggml_tensor * a,
|
4003
|
+
struct lm_ggml_tensor * b,
|
4004
|
+
int s0,
|
4005
|
+
int s1,
|
4006
|
+
int p0,
|
4007
|
+
int p1,
|
4008
|
+
int d0,
|
4009
|
+
int d1) {
|
4010
|
+
struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
4011
|
+
struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
|
4012
|
+
lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
4013
|
+
s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
4014
|
+
struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
4015
|
+
|
4016
|
+
new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
4017
|
+
struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
|
4018
|
+
result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
4019
|
+
|
4020
|
+
return result;
|
4021
|
+
}
|
4022
|
+
|
3937
4023
|
// lm_ggml_conv_transpose_2d_p0
|
3938
4024
|
|
3939
4025
|
static int64_t lm_ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
|
@@ -4110,6 +4196,37 @@ struct lm_ggml_tensor * lm_ggml_pad(
|
|
4110
4196
|
return result;
|
4111
4197
|
}
|
4112
4198
|
|
4199
|
+
// lm_ggml_pad_reflect_1d
|
4200
|
+
|
4201
|
+
struct lm_ggml_tensor * lm_ggml_pad_reflect_1d(
|
4202
|
+
struct lm_ggml_context * ctx,
|
4203
|
+
struct lm_ggml_tensor * a,
|
4204
|
+
int p0,
|
4205
|
+
int p1) {
|
4206
|
+
LM_GGML_ASSERT(p0 >= 0);
|
4207
|
+
LM_GGML_ASSERT(p1 >= 0);
|
4208
|
+
|
4209
|
+
LM_GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
|
4210
|
+
LM_GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
|
4211
|
+
|
4212
|
+
LM_GGML_ASSERT(lm_ggml_is_contiguous(a));
|
4213
|
+
LM_GGML_ASSERT(a->type == LM_GGML_TYPE_F32);
|
4214
|
+
|
4215
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor_4d(ctx, a->type,
|
4216
|
+
a->ne[0] + p0 + p1,
|
4217
|
+
a->ne[1],
|
4218
|
+
a->ne[2],
|
4219
|
+
a->ne[3]);
|
4220
|
+
|
4221
|
+
int32_t params[] = { p0, p1 };
|
4222
|
+
lm_ggml_set_op_params(result, params, sizeof(params));
|
4223
|
+
|
4224
|
+
result->op = LM_GGML_OP_PAD_REFLECT_1D;
|
4225
|
+
result->src[0] = a;
|
4226
|
+
|
4227
|
+
return result;
|
4228
|
+
}
|
4229
|
+
|
4113
4230
|
// lm_ggml_arange
|
4114
4231
|
|
4115
4232
|
struct lm_ggml_tensor * lm_ggml_arange(
|
@@ -5967,12 +6084,12 @@ struct lm_ggml_tensor * lm_ggml_graph_get_tensor(const struct lm_ggml_cgraph * c
|
|
5967
6084
|
|
5968
6085
|
struct lm_ggml_tensor * lm_ggml_graph_get_grad(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
|
5969
6086
|
const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
|
5970
|
-
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL;
|
6087
|
+
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
|
5971
6088
|
}
|
5972
6089
|
|
5973
6090
|
struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
|
5974
6091
|
const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
|
5975
|
-
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL;
|
6092
|
+
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
|
5976
6093
|
}
|
5977
6094
|
|
5978
6095
|
void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph) {
|
@@ -6284,9 +6401,6 @@ size_t lm_ggml_quantize_chunk(
|
|
6284
6401
|
case LM_GGML_TYPE_IQ1_M: result = quantize_iq1_m (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
6285
6402
|
case LM_GGML_TYPE_IQ4_NL: result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
6286
6403
|
case LM_GGML_TYPE_IQ4_XS: result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
6287
|
-
case LM_GGML_TYPE_Q4_0_4_4: result = quantize_q4_0_4x4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
6288
|
-
case LM_GGML_TYPE_Q4_0_4_8: result = quantize_q4_0_4x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
6289
|
-
case LM_GGML_TYPE_Q4_0_8_8: result = quantize_q4_0_8x8(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
|
6290
6404
|
case LM_GGML_TYPE_F16:
|
6291
6405
|
{
|
6292
6406
|
size_t elemsize = sizeof(lm_ggml_fp16_t);
|
@@ -6422,7 +6536,7 @@ struct lm_gguf_context {
|
|
6422
6536
|
void * data;
|
6423
6537
|
};
|
6424
6538
|
|
6425
|
-
|
6539
|
+
size_t lm_gguf_type_size(enum lm_gguf_type type) {
|
6426
6540
|
LM_GGML_ASSERT(0 <= type && type < LM_GGUF_TYPE_COUNT);
|
6427
6541
|
return LM_GGUF_TYPE_SIZE[type];
|
6428
6542
|
}
|
@@ -6550,13 +6664,7 @@ struct lm_gguf_context * lm_gguf_init_empty(void) {
|
|
6550
6664
|
return ctx;
|
6551
6665
|
}
|
6552
6666
|
|
6553
|
-
struct lm_gguf_context *
|
6554
|
-
FILE * file = lm_ggml_fopen(fname, "rb");
|
6555
|
-
if (!file) {
|
6556
|
-
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
6557
|
-
return NULL;
|
6558
|
-
}
|
6559
|
-
|
6667
|
+
struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
|
6560
6668
|
// offset from start of file
|
6561
6669
|
size_t offset = 0;
|
6562
6670
|
|
@@ -6569,7 +6677,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6569
6677
|
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
6570
6678
|
if (magic[i] != LM_GGUF_MAGIC[i]) {
|
6571
6679
|
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
6572
|
-
fclose(file);
|
6573
6680
|
return NULL;
|
6574
6681
|
}
|
6575
6682
|
}
|
@@ -6580,7 +6687,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6580
6687
|
struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
|
6581
6688
|
if (!ctx) {
|
6582
6689
|
fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
|
6583
|
-
fclose(file);
|
6584
6690
|
return NULL;
|
6585
6691
|
}
|
6586
6692
|
|
@@ -6598,7 +6704,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6598
6704
|
|
6599
6705
|
if (ctx->header.version == 1) {
|
6600
6706
|
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
6601
|
-
fclose(file);
|
6602
6707
|
lm_gguf_free(ctx);
|
6603
6708
|
return NULL;
|
6604
6709
|
}
|
@@ -6611,7 +6716,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6611
6716
|
|
6612
6717
|
if (!ok) {
|
6613
6718
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
6614
|
-
fclose(file);
|
6615
6719
|
lm_gguf_free(ctx);
|
6616
6720
|
return NULL;
|
6617
6721
|
}
|
@@ -6621,12 +6725,13 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6621
6725
|
{
|
6622
6726
|
const uint64_t n_kv = ctx->header.n_kv;
|
6623
6727
|
|
6624
|
-
|
6625
|
-
|
6626
|
-
|
6627
|
-
|
6628
|
-
|
6629
|
-
|
6728
|
+
if (n_kv > 0) {
|
6729
|
+
ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
|
6730
|
+
if (!ctx->kv) {
|
6731
|
+
fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
|
6732
|
+
lm_gguf_free(ctx);
|
6733
|
+
return NULL;
|
6734
|
+
}
|
6630
6735
|
}
|
6631
6736
|
|
6632
6737
|
for (uint64_t i = 0; i < n_kv; ++i) {
|
@@ -6673,7 +6778,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6673
6778
|
// prevent from integer overflow in the malloc below
|
6674
6779
|
if (kv->value.arr.n >= SIZE_MAX/lm_gguf_type_size(kv->value.arr.type)) {
|
6675
6780
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
6676
|
-
fclose(file);
|
6677
6781
|
lm_gguf_free(ctx);
|
6678
6782
|
return NULL;
|
6679
6783
|
}
|
@@ -6681,7 +6785,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6681
6785
|
kv->value.arr.data = calloc(kv->value.arr.n, lm_gguf_type_size(kv->value.arr.type));
|
6682
6786
|
if (!kv->value.arr.data) {
|
6683
6787
|
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
6684
|
-
fclose(file);
|
6685
6788
|
lm_gguf_free(ctx);
|
6686
6789
|
return NULL;
|
6687
6790
|
}
|
@@ -6693,7 +6796,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6693
6796
|
// prevent from integer overflow in the malloc below
|
6694
6797
|
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct lm_gguf_str)) {
|
6695
6798
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
6696
|
-
fclose(file);
|
6697
6799
|
lm_gguf_free(ctx);
|
6698
6800
|
return NULL;
|
6699
6801
|
}
|
@@ -6701,7 +6803,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6701
6803
|
kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct lm_gguf_str));
|
6702
6804
|
if (!kv->value.arr.data) {
|
6703
6805
|
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
6704
|
-
fclose(file);
|
6705
6806
|
lm_gguf_free(ctx);
|
6706
6807
|
return NULL;
|
6707
6808
|
}
|
@@ -6732,7 +6833,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6732
6833
|
|
6733
6834
|
if (!ok) {
|
6734
6835
|
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
6735
|
-
fclose(file);
|
6736
6836
|
lm_gguf_free(ctx);
|
6737
6837
|
return NULL;
|
6738
6838
|
}
|
@@ -6743,7 +6843,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6743
6843
|
ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct lm_gguf_tensor_info));
|
6744
6844
|
if (!ctx->infos) {
|
6745
6845
|
fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
|
6746
|
-
fclose(file);
|
6747
6846
|
lm_gguf_free(ctx);
|
6748
6847
|
return NULL;
|
6749
6848
|
}
|
@@ -6779,7 +6878,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6779
6878
|
|
6780
6879
|
if (!ok) {
|
6781
6880
|
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
6782
|
-
fclose(file);
|
6783
6881
|
lm_gguf_free(ctx);
|
6784
6882
|
return NULL;
|
6785
6883
|
}
|
@@ -6818,10 +6916,17 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6818
6916
|
(int64_t) info->ne[2] *
|
6819
6917
|
(int64_t) info->ne[3];
|
6820
6918
|
|
6821
|
-
if (lm_ggml_blck_size(info->type) == 0
|
6919
|
+
if (lm_ggml_blck_size(info->type) == 0 ) {
|
6920
|
+
// this tensor type support have been removed:
|
6921
|
+
fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
|
6922
|
+
__func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type));
|
6923
|
+
lm_gguf_free(ctx);
|
6924
|
+
return NULL;
|
6925
|
+
}
|
6926
|
+
|
6927
|
+
if (ne % lm_ggml_blck_size(info->type) != 0) {
|
6822
6928
|
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
6823
6929
|
__func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type), ne, lm_ggml_blck_size(info->type));
|
6824
|
-
fclose(file);
|
6825
6930
|
lm_gguf_free(ctx);
|
6826
6931
|
return NULL;
|
6827
6932
|
}
|
@@ -6853,7 +6958,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6853
6958
|
*params.ctx = lm_ggml_init(pdata);
|
6854
6959
|
if (*params.ctx == NULL) {
|
6855
6960
|
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
6856
|
-
fclose(file);
|
6857
6961
|
lm_gguf_free(ctx);
|
6858
6962
|
return NULL;
|
6859
6963
|
}
|
@@ -6872,7 +6976,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6872
6976
|
|
6873
6977
|
if (!ok) {
|
6874
6978
|
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
6875
|
-
fclose(file);
|
6876
6979
|
lm_ggml_free(ctx_data);
|
6877
6980
|
lm_gguf_free(ctx);
|
6878
6981
|
return NULL;
|
@@ -6911,7 +7014,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6911
7014
|
|
6912
7015
|
if (!ok) {
|
6913
7016
|
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
6914
|
-
fclose(file);
|
6915
7017
|
lm_ggml_free(ctx_data);
|
6916
7018
|
lm_gguf_free(ctx);
|
6917
7019
|
return NULL;
|
@@ -6920,11 +7022,21 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6920
7022
|
lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
|
6921
7023
|
}
|
6922
7024
|
|
6923
|
-
fclose(file);
|
6924
|
-
|
6925
7025
|
return ctx;
|
6926
7026
|
}
|
6927
7027
|
|
7028
|
+
struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
|
7029
|
+
FILE * file = lm_ggml_fopen(fname, "rb");
|
7030
|
+
if (!file) {
|
7031
|
+
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
7032
|
+
return NULL;
|
7033
|
+
}
|
7034
|
+
|
7035
|
+
struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
|
7036
|
+
fclose(file);
|
7037
|
+
return result;
|
7038
|
+
}
|
7039
|
+
|
6928
7040
|
void lm_gguf_free(struct lm_gguf_context * ctx) {
|
6929
7041
|
if (ctx == NULL) {
|
6930
7042
|
return;
|
@@ -7384,13 +7496,7 @@ void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, co
|
|
7384
7496
|
// fwrite(val, sizeof(char), size, file);
|
7385
7497
|
//}
|
7386
7498
|
|
7387
|
-
struct lm_gguf_buf {
|
7388
|
-
void * data;
|
7389
|
-
size_t size;
|
7390
|
-
size_t offset;
|
7391
|
-
};
|
7392
|
-
|
7393
|
-
static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
|
7499
|
+
struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
|
7394
7500
|
struct lm_gguf_buf buf = {
|
7395
7501
|
/*buf.data =*/ size == 0 ? NULL : LM_GGML_CALLOC(1, size),
|
7396
7502
|
/*buf.size =*/ size,
|
@@ -7400,7 +7506,7 @@ static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
|
|
7400
7506
|
return buf;
|
7401
7507
|
}
|
7402
7508
|
|
7403
|
-
|
7509
|
+
void lm_gguf_buf_free(struct lm_gguf_buf buf) {
|
7404
7510
|
if (buf.data) {
|
7405
7511
|
LM_GGML_FREE(buf.data);
|
7406
7512
|
}
|
@@ -7438,7 +7544,7 @@ static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t
|
|
7438
7544
|
buf->offset += el_size;
|
7439
7545
|
}
|
7440
7546
|
|
7441
|
-
|
7547
|
+
void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
|
7442
7548
|
// write header
|
7443
7549
|
lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
|
7444
7550
|
lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
|