cui-llama.rn 1.3.4 → 1.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cpp/common.cpp +7 -4
- package/cpp/common.h +14 -2
- package/cpp/ggml-alloc.c +0 -1
- package/cpp/ggml-backend-reg.cpp +74 -49
- package/cpp/ggml-cpu-aarch64.cpp +51 -71
- package/cpp/ggml-cpu.c +6 -6
- package/cpp/ggml-cpu.cpp +9 -0
- package/cpp/ggml-impl.h +16 -0
- package/cpp/ggml.c +153 -136
- package/cpp/ggml.h +29 -12
- package/cpp/llama-grammar.cpp +15 -15
- package/cpp/llama-grammar.h +2 -5
- package/cpp/llama-vocab.cpp +5 -1
- package/cpp/llama-vocab.h +1 -1
- package/cpp/llama.cpp +992 -300
- package/cpp/llama.h +0 -3
- package/cpp/sgemm.cpp +265 -258
- package/cpp/sgemm.h +2 -2
- package/package.json +1 -1
package/cpp/ggml.c
CHANGED
@@ -3773,13 +3773,84 @@ struct lm_ggml_tensor * lm_ggml_clamp(
|
|
3773
3773
|
return result;
|
3774
3774
|
}
|
3775
3775
|
|
3776
|
-
// lm_ggml_conv_1d
|
3777
|
-
|
3778
3776
|
static int64_t lm_ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
3779
3777
|
return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
|
3780
3778
|
}
|
3781
3779
|
|
3782
|
-
|
3780
|
+
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
|
3781
|
+
// a: [OC,IC, KH, KW]
|
3782
|
+
// b: [N, IC, IH, IW]
|
3783
|
+
// result: [N, OH, OW, IC*KH*KW]
|
3784
|
+
struct lm_ggml_tensor * lm_ggml_im2col(
|
3785
|
+
struct lm_ggml_context * ctx,
|
3786
|
+
struct lm_ggml_tensor * a,
|
3787
|
+
struct lm_ggml_tensor * b,
|
3788
|
+
int s0,
|
3789
|
+
int s1,
|
3790
|
+
int p0,
|
3791
|
+
int p1,
|
3792
|
+
int d0,
|
3793
|
+
int d1,
|
3794
|
+
bool is_2D,
|
3795
|
+
enum lm_ggml_type dst_type) {
|
3796
|
+
if (is_2D) {
|
3797
|
+
LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
|
3798
|
+
} else {
|
3799
|
+
//LM_GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
|
3800
|
+
LM_GGML_ASSERT(b->ne[1] == a->ne[1]);
|
3801
|
+
LM_GGML_ASSERT(b->ne[3] == 1);
|
3802
|
+
}
|
3803
|
+
|
3804
|
+
const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
3805
|
+
const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
3806
|
+
|
3807
|
+
LM_GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
|
3808
|
+
LM_GGML_ASSERT((OW > 0) && "b too small compared to a");
|
3809
|
+
|
3810
|
+
const int64_t ne[4] = {
|
3811
|
+
is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
|
3812
|
+
OW,
|
3813
|
+
is_2D ? OH : b->ne[2],
|
3814
|
+
is_2D ? b->ne[3] : 1,
|
3815
|
+
};
|
3816
|
+
|
3817
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, dst_type, 4, ne);
|
3818
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3819
|
+
lm_ggml_set_op_params(result, params, sizeof(params));
|
3820
|
+
|
3821
|
+
result->op = LM_GGML_OP_IM2COL;
|
3822
|
+
result->src[0] = a;
|
3823
|
+
result->src[1] = b;
|
3824
|
+
|
3825
|
+
return result;
|
3826
|
+
}
|
3827
|
+
|
3828
|
+
struct lm_ggml_tensor * lm_ggml_im2col_back(
|
3829
|
+
struct lm_ggml_context * ctx,
|
3830
|
+
struct lm_ggml_tensor * a,
|
3831
|
+
struct lm_ggml_tensor * b,
|
3832
|
+
int64_t * ne,
|
3833
|
+
int s0,
|
3834
|
+
int s1,
|
3835
|
+
int p0,
|
3836
|
+
int p1,
|
3837
|
+
int d0,
|
3838
|
+
int d1,
|
3839
|
+
bool is_2D) {
|
3840
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3841
|
+
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3842
|
+
lm_ggml_set_op_params(result, params, sizeof(params));
|
3843
|
+
|
3844
|
+
result->op = LM_GGML_OP_IM2COL_BACK;
|
3845
|
+
result->src[0] = a;
|
3846
|
+
result->src[1] = b;
|
3847
|
+
|
3848
|
+
return result;
|
3849
|
+
}
|
3850
|
+
|
3851
|
+
// lm_ggml_conv_1d
|
3852
|
+
|
3853
|
+
struct lm_ggml_tensor * lm_ggml_conv_1d(
|
3783
3854
|
struct lm_ggml_context * ctx,
|
3784
3855
|
struct lm_ggml_tensor * a,
|
3785
3856
|
struct lm_ggml_tensor * b,
|
@@ -3809,137 +3880,75 @@ struct lm_ggml_tensor* lm_ggml_conv_1d_ph(
|
|
3809
3880
|
return lm_ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
|
3810
3881
|
}
|
3811
3882
|
|
3812
|
-
//
|
3883
|
+
// lm_ggml_conv_1d_dw
|
3813
3884
|
|
3814
|
-
|
3815
|
-
return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
|
3816
|
-
}
|
3817
|
-
|
3818
|
-
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
|
3885
|
+
struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
|
3819
3886
|
struct lm_ggml_context * ctx,
|
3820
3887
|
struct lm_ggml_tensor * a,
|
3821
3888
|
struct lm_ggml_tensor * b,
|
3822
3889
|
int s0,
|
3823
3890
|
int p0,
|
3824
3891
|
int d0) {
|
3825
|
-
|
3826
|
-
|
3827
|
-
LM_GGML_ASSERT(a->ne[3] == 1);
|
3892
|
+
struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], 1, a->ne[1], a->ne[2]);
|
3893
|
+
struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
|
3828
3894
|
|
3829
|
-
|
3830
|
-
LM_GGML_ASSERT(d0 == 1);
|
3895
|
+
struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a, new_b, s0, 0, p0, 0, d0, 0, false, LM_GGML_TYPE_F16);
|
3831
3896
|
|
3832
|
-
|
3833
|
-
lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
|
3834
|
-
a->ne[1], b->ne[2], 1,
|
3835
|
-
};
|
3836
|
-
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3897
|
+
struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, im2col, a);
|
3837
3898
|
|
3838
|
-
|
3839
|
-
lm_ggml_set_op_params(result, params, sizeof(params));
|
3840
|
-
|
3841
|
-
result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
|
3842
|
-
result->src[0] = a;
|
3843
|
-
result->src[1] = b;
|
3899
|
+
result = lm_ggml_reshape_3d(ctx, result, b->ne[0], b->ne[1], 1);
|
3844
3900
|
|
3845
3901
|
return result;
|
3846
3902
|
}
|
3847
3903
|
|
3848
|
-
//
|
3904
|
+
// lm_ggml_conv_1d_dw_ph
|
3849
3905
|
|
3850
|
-
struct lm_ggml_tensor *
|
3906
|
+
struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
|
3851
3907
|
struct lm_ggml_context * ctx,
|
3852
3908
|
struct lm_ggml_tensor * a,
|
3853
3909
|
struct lm_ggml_tensor * b,
|
3854
3910
|
int s0,
|
3855
|
-
int
|
3856
|
-
|
3857
|
-
|
3858
|
-
int d0,
|
3859
|
-
int d1) {
|
3860
|
-
struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
3861
|
-
struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
|
3862
|
-
lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
3863
|
-
s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
3864
|
-
struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
3911
|
+
int d0) {
|
3912
|
+
return lm_ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
|
3913
|
+
}
|
3865
3914
|
|
3866
|
-
|
3867
|
-
struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
|
3868
|
-
result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
3915
|
+
// lm_ggml_conv_transpose_1d
|
3869
3916
|
|
3870
|
-
|
3917
|
+
static int64_t lm_ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
|
3918
|
+
return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
|
3871
3919
|
}
|
3872
|
-
// lm_ggml_conv_2d
|
3873
3920
|
|
3874
|
-
|
3875
|
-
// a: [OC,IC, KH, KW]
|
3876
|
-
// b: [N, IC, IH, IW]
|
3877
|
-
// result: [N, OH, OW, IC*KH*KW]
|
3878
|
-
struct lm_ggml_tensor * lm_ggml_im2col(
|
3921
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
|
3879
3922
|
struct lm_ggml_context * ctx,
|
3880
3923
|
struct lm_ggml_tensor * a,
|
3881
3924
|
struct lm_ggml_tensor * b,
|
3882
3925
|
int s0,
|
3883
|
-
int s1,
|
3884
3926
|
int p0,
|
3885
|
-
int
|
3886
|
-
|
3887
|
-
|
3888
|
-
|
3889
|
-
enum lm_ggml_type dst_type) {
|
3890
|
-
if(is_2D) {
|
3891
|
-
LM_GGML_ASSERT(a->ne[2] == b->ne[2]);
|
3892
|
-
} else {
|
3893
|
-
LM_GGML_ASSERT(a->ne[1] == b->ne[1]);
|
3894
|
-
LM_GGML_ASSERT(b->ne[3] == 1);
|
3895
|
-
}
|
3896
|
-
|
3897
|
-
const int64_t OH = is_2D ? lm_ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
|
3898
|
-
const int64_t OW = lm_ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
|
3927
|
+
int d0) {
|
3928
|
+
LM_GGML_ASSERT(lm_ggml_is_matrix(b));
|
3929
|
+
LM_GGML_ASSERT(a->ne[2] == b->ne[1]);
|
3930
|
+
LM_GGML_ASSERT(a->ne[3] == 1);
|
3899
3931
|
|
3900
|
-
LM_GGML_ASSERT(
|
3901
|
-
LM_GGML_ASSERT(
|
3932
|
+
LM_GGML_ASSERT(p0 == 0);
|
3933
|
+
LM_GGML_ASSERT(d0 == 1);
|
3902
3934
|
|
3903
3935
|
const int64_t ne[4] = {
|
3904
|
-
|
3905
|
-
|
3906
|
-
is_2D ? OH : b->ne[2],
|
3907
|
-
is_2D ? b->ne[3] : 1,
|
3936
|
+
lm_ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
|
3937
|
+
a->ne[1], b->ne[2], 1,
|
3908
3938
|
};
|
3939
|
+
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3909
3940
|
|
3910
|
-
|
3911
|
-
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3941
|
+
int32_t params[] = { s0, p0, d0 };
|
3912
3942
|
lm_ggml_set_op_params(result, params, sizeof(params));
|
3913
3943
|
|
3914
|
-
result->op =
|
3944
|
+
result->op = LM_GGML_OP_CONV_TRANSPOSE_1D;
|
3915
3945
|
result->src[0] = a;
|
3916
3946
|
result->src[1] = b;
|
3917
3947
|
|
3918
3948
|
return result;
|
3919
3949
|
}
|
3920
3950
|
|
3921
|
-
|
3922
|
-
struct lm_ggml_context * ctx,
|
3923
|
-
struct lm_ggml_tensor * a,
|
3924
|
-
struct lm_ggml_tensor * b,
|
3925
|
-
int64_t * ne,
|
3926
|
-
int s0,
|
3927
|
-
int s1,
|
3928
|
-
int p0,
|
3929
|
-
int p1,
|
3930
|
-
int d0,
|
3931
|
-
int d1,
|
3932
|
-
bool is_2D) {
|
3933
|
-
struct lm_ggml_tensor * result = lm_ggml_new_tensor(ctx, LM_GGML_TYPE_F32, 4, ne);
|
3934
|
-
int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
|
3935
|
-
lm_ggml_set_op_params(result, params, sizeof(params));
|
3936
|
-
|
3937
|
-
result->op = LM_GGML_OP_IM2COL_BACK;
|
3938
|
-
result->src[0] = a;
|
3939
|
-
result->src[1] = b;
|
3940
|
-
|
3941
|
-
return result;
|
3942
|
-
}
|
3951
|
+
// lm_ggml_conv_2d
|
3943
3952
|
|
3944
3953
|
// a: [OC,IC, KH, KW]
|
3945
3954
|
// b: [N, IC, IH, IW]
|
@@ -3986,6 +3995,31 @@ struct lm_ggml_tensor * lm_ggml_conv_2d_s1_ph(
|
|
3986
3995
|
return lm_ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
|
3987
3996
|
}
|
3988
3997
|
|
3998
|
+
// lm_ggml_conv_2d_dw
|
3999
|
+
|
4000
|
+
struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
|
4001
|
+
struct lm_ggml_context * ctx,
|
4002
|
+
struct lm_ggml_tensor * a,
|
4003
|
+
struct lm_ggml_tensor * b,
|
4004
|
+
int s0,
|
4005
|
+
int s1,
|
4006
|
+
int p0,
|
4007
|
+
int p1,
|
4008
|
+
int d0,
|
4009
|
+
int d1) {
|
4010
|
+
struct lm_ggml_tensor * new_a = lm_ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
|
4011
|
+
struct lm_ggml_tensor * im2col = lm_ggml_im2col(ctx, new_a,
|
4012
|
+
lm_ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
|
4013
|
+
s0, s1, p0, p1, d0, d1, true, LM_GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
|
4014
|
+
struct lm_ggml_tensor * new_b = lm_ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
|
4015
|
+
|
4016
|
+
new_a = lm_ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2], new_a->ne[3], 1); // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
|
4017
|
+
struct lm_ggml_tensor * result = lm_ggml_mul_mat(ctx, new_a, new_b);
|
4018
|
+
result = lm_ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
|
4019
|
+
|
4020
|
+
return result;
|
4021
|
+
}
|
4022
|
+
|
3989
4023
|
// lm_ggml_conv_transpose_2d_p0
|
3990
4024
|
|
3991
4025
|
static int64_t lm_ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
|
@@ -6050,12 +6084,12 @@ struct lm_ggml_tensor * lm_ggml_graph_get_tensor(const struct lm_ggml_cgraph * c
|
|
6050
6084
|
|
6051
6085
|
struct lm_ggml_tensor * lm_ggml_graph_get_grad(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
|
6052
6086
|
const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
|
6053
|
-
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grads[igrad] : NULL;
|
6087
|
+
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
|
6054
6088
|
}
|
6055
6089
|
|
6056
6090
|
struct lm_ggml_tensor * lm_ggml_graph_get_grad_acc(const struct lm_ggml_cgraph * cgraph, const struct lm_ggml_tensor * node) {
|
6057
6091
|
const size_t igrad = lm_ggml_hash_find(&cgraph->visited_hash_set, node);
|
6058
|
-
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) ? cgraph->grad_accs[igrad] : NULL;
|
6092
|
+
return igrad != LM_GGML_HASHSET_FULL && lm_ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
|
6059
6093
|
}
|
6060
6094
|
|
6061
6095
|
void lm_ggml_graph_print(const struct lm_ggml_cgraph * cgraph) {
|
@@ -6502,7 +6536,7 @@ struct lm_gguf_context {
|
|
6502
6536
|
void * data;
|
6503
6537
|
};
|
6504
6538
|
|
6505
|
-
|
6539
|
+
size_t lm_gguf_type_size(enum lm_gguf_type type) {
|
6506
6540
|
LM_GGML_ASSERT(0 <= type && type < LM_GGUF_TYPE_COUNT);
|
6507
6541
|
return LM_GGUF_TYPE_SIZE[type];
|
6508
6542
|
}
|
@@ -6630,13 +6664,7 @@ struct lm_gguf_context * lm_gguf_init_empty(void) {
|
|
6630
6664
|
return ctx;
|
6631
6665
|
}
|
6632
6666
|
|
6633
|
-
struct lm_gguf_context *
|
6634
|
-
FILE * file = lm_ggml_fopen(fname, "rb");
|
6635
|
-
if (!file) {
|
6636
|
-
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
6637
|
-
return NULL;
|
6638
|
-
}
|
6639
|
-
|
6667
|
+
struct lm_gguf_context * lm_gguf_init_from_file_impl(FILE * file, struct lm_gguf_init_params params) {
|
6640
6668
|
// offset from start of file
|
6641
6669
|
size_t offset = 0;
|
6642
6670
|
|
@@ -6649,7 +6677,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6649
6677
|
for (uint32_t i = 0; i < sizeof(magic); i++) {
|
6650
6678
|
if (magic[i] != LM_GGUF_MAGIC[i]) {
|
6651
6679
|
fprintf(stderr, "%s: invalid magic characters '%c%c%c%c'\n", __func__, magic[0], magic[1], magic[2], magic[3]);
|
6652
|
-
fclose(file);
|
6653
6680
|
return NULL;
|
6654
6681
|
}
|
6655
6682
|
}
|
@@ -6660,7 +6687,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6660
6687
|
struct lm_gguf_context * ctx = calloc(1, sizeof(struct lm_gguf_context));
|
6661
6688
|
if (!ctx) {
|
6662
6689
|
fprintf(stderr, "%s: failed to allocate memory for context\n", __func__);
|
6663
|
-
fclose(file);
|
6664
6690
|
return NULL;
|
6665
6691
|
}
|
6666
6692
|
|
@@ -6678,7 +6704,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6678
6704
|
|
6679
6705
|
if (ctx->header.version == 1) {
|
6680
6706
|
fprintf(stderr, "%s: GGUFv1 is no longer supported. please use a more up-to-date version\n", __func__);
|
6681
|
-
fclose(file);
|
6682
6707
|
lm_gguf_free(ctx);
|
6683
6708
|
return NULL;
|
6684
6709
|
}
|
@@ -6691,7 +6716,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6691
6716
|
|
6692
6717
|
if (!ok) {
|
6693
6718
|
fprintf(stderr, "%s: failed to read header\n", __func__);
|
6694
|
-
fclose(file);
|
6695
6719
|
lm_gguf_free(ctx);
|
6696
6720
|
return NULL;
|
6697
6721
|
}
|
@@ -6701,12 +6725,13 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6701
6725
|
{
|
6702
6726
|
const uint64_t n_kv = ctx->header.n_kv;
|
6703
6727
|
|
6704
|
-
|
6705
|
-
|
6706
|
-
|
6707
|
-
|
6708
|
-
|
6709
|
-
|
6728
|
+
if (n_kv > 0) {
|
6729
|
+
ctx->kv = calloc(n_kv, sizeof(struct lm_gguf_kv));
|
6730
|
+
if (!ctx->kv) {
|
6731
|
+
fprintf(stderr, "%s: failed to allocate memory for kv pairs\n", __func__);
|
6732
|
+
lm_gguf_free(ctx);
|
6733
|
+
return NULL;
|
6734
|
+
}
|
6710
6735
|
}
|
6711
6736
|
|
6712
6737
|
for (uint64_t i = 0; i < n_kv; ++i) {
|
@@ -6753,7 +6778,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6753
6778
|
// prevent from integer overflow in the malloc below
|
6754
6779
|
if (kv->value.arr.n >= SIZE_MAX/lm_gguf_type_size(kv->value.arr.type)) {
|
6755
6780
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
6756
|
-
fclose(file);
|
6757
6781
|
lm_gguf_free(ctx);
|
6758
6782
|
return NULL;
|
6759
6783
|
}
|
@@ -6761,7 +6785,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6761
6785
|
kv->value.arr.data = calloc(kv->value.arr.n, lm_gguf_type_size(kv->value.arr.type));
|
6762
6786
|
if (!kv->value.arr.data) {
|
6763
6787
|
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
6764
|
-
fclose(file);
|
6765
6788
|
lm_gguf_free(ctx);
|
6766
6789
|
return NULL;
|
6767
6790
|
}
|
@@ -6773,7 +6796,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6773
6796
|
// prevent from integer overflow in the malloc below
|
6774
6797
|
if (kv->value.arr.n >= SIZE_MAX/sizeof(struct lm_gguf_str)) {
|
6775
6798
|
fprintf(stderr, "%s: array size is too large (%" PRIu64 ")\n", __func__, kv->value.arr.n);
|
6776
|
-
fclose(file);
|
6777
6799
|
lm_gguf_free(ctx);
|
6778
6800
|
return NULL;
|
6779
6801
|
}
|
@@ -6781,7 +6803,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6781
6803
|
kv->value.arr.data = calloc(kv->value.arr.n, sizeof(struct lm_gguf_str));
|
6782
6804
|
if (!kv->value.arr.data) {
|
6783
6805
|
fprintf(stderr, "%s: failed to allocate memory for array\n", __func__);
|
6784
|
-
fclose(file);
|
6785
6806
|
lm_gguf_free(ctx);
|
6786
6807
|
return NULL;
|
6787
6808
|
}
|
@@ -6812,7 +6833,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6812
6833
|
|
6813
6834
|
if (!ok) {
|
6814
6835
|
fprintf(stderr, "%s: failed to read key-value pairs\n", __func__);
|
6815
|
-
fclose(file);
|
6816
6836
|
lm_gguf_free(ctx);
|
6817
6837
|
return NULL;
|
6818
6838
|
}
|
@@ -6823,7 +6843,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6823
6843
|
ctx->infos = calloc(ctx->header.n_tensors, sizeof(struct lm_gguf_tensor_info));
|
6824
6844
|
if (!ctx->infos) {
|
6825
6845
|
fprintf(stderr, "%s: failed to allocate memory for tensor infos\n", __func__);
|
6826
|
-
fclose(file);
|
6827
6846
|
lm_gguf_free(ctx);
|
6828
6847
|
return NULL;
|
6829
6848
|
}
|
@@ -6859,7 +6878,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6859
6878
|
|
6860
6879
|
if (!ok) {
|
6861
6880
|
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
|
6862
|
-
fclose(file);
|
6863
6881
|
lm_gguf_free(ctx);
|
6864
6882
|
return NULL;
|
6865
6883
|
}
|
@@ -6902,7 +6920,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6902
6920
|
// this tensor type support have been removed:
|
6903
6921
|
fprintf(stderr, "%s: tensor '%s' of type %d: %s\n",
|
6904
6922
|
__func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type));
|
6905
|
-
fclose(file);
|
6906
6923
|
lm_gguf_free(ctx);
|
6907
6924
|
return NULL;
|
6908
6925
|
}
|
@@ -6910,7 +6927,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6910
6927
|
if (ne % lm_ggml_blck_size(info->type) != 0) {
|
6911
6928
|
fprintf(stderr, "%s: tensor '%s' of type %d (%s) number of elements (%" PRId64 ") is not a multiple of block size (%" PRId64 ")\n",
|
6912
6929
|
__func__, info->name.data, (int) info->type, lm_ggml_type_name(info->type), ne, lm_ggml_blck_size(info->type));
|
6913
|
-
fclose(file);
|
6914
6930
|
lm_gguf_free(ctx);
|
6915
6931
|
return NULL;
|
6916
6932
|
}
|
@@ -6942,7 +6958,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6942
6958
|
*params.ctx = lm_ggml_init(pdata);
|
6943
6959
|
if (*params.ctx == NULL) {
|
6944
6960
|
fprintf(stderr, "%s: failed to initialize context\n", __func__);
|
6945
|
-
fclose(file);
|
6946
6961
|
lm_gguf_free(ctx);
|
6947
6962
|
return NULL;
|
6948
6963
|
}
|
@@ -6961,7 +6976,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
6961
6976
|
|
6962
6977
|
if (!ok) {
|
6963
6978
|
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
|
6964
|
-
fclose(file);
|
6965
6979
|
lm_ggml_free(ctx_data);
|
6966
6980
|
lm_gguf_free(ctx);
|
6967
6981
|
return NULL;
|
@@ -7000,7 +7014,6 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
7000
7014
|
|
7001
7015
|
if (!ok) {
|
7002
7016
|
fprintf(stderr, "%s: failed to read the tensor data\n", __func__);
|
7003
|
-
fclose(file);
|
7004
7017
|
lm_ggml_free(ctx_data);
|
7005
7018
|
lm_gguf_free(ctx);
|
7006
7019
|
return NULL;
|
@@ -7009,11 +7022,21 @@ struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gg
|
|
7009
7022
|
lm_ggml_set_no_alloc(ctx_data, params.no_alloc);
|
7010
7023
|
}
|
7011
7024
|
|
7012
|
-
fclose(file);
|
7013
|
-
|
7014
7025
|
return ctx;
|
7015
7026
|
}
|
7016
7027
|
|
7028
|
+
struct lm_gguf_context * lm_gguf_init_from_file(const char * fname, struct lm_gguf_init_params params) {
|
7029
|
+
FILE * file = lm_ggml_fopen(fname, "rb");
|
7030
|
+
if (!file) {
|
7031
|
+
fprintf(stderr, "%s: failed to open '%s': '%s'\n", __func__, fname, strerror(errno));
|
7032
|
+
return NULL;
|
7033
|
+
}
|
7034
|
+
|
7035
|
+
struct lm_gguf_context * result = lm_gguf_init_from_file_impl(file, params);
|
7036
|
+
fclose(file);
|
7037
|
+
return result;
|
7038
|
+
}
|
7039
|
+
|
7017
7040
|
void lm_gguf_free(struct lm_gguf_context * ctx) {
|
7018
7041
|
if (ctx == NULL) {
|
7019
7042
|
return;
|
@@ -7473,13 +7496,7 @@ void lm_gguf_set_tensor_data(struct lm_gguf_context * ctx, const char * name, co
|
|
7473
7496
|
// fwrite(val, sizeof(char), size, file);
|
7474
7497
|
//}
|
7475
7498
|
|
7476
|
-
struct lm_gguf_buf {
|
7477
|
-
void * data;
|
7478
|
-
size_t size;
|
7479
|
-
size_t offset;
|
7480
|
-
};
|
7481
|
-
|
7482
|
-
static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
|
7499
|
+
struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
|
7483
7500
|
struct lm_gguf_buf buf = {
|
7484
7501
|
/*buf.data =*/ size == 0 ? NULL : LM_GGML_CALLOC(1, size),
|
7485
7502
|
/*buf.size =*/ size,
|
@@ -7489,7 +7506,7 @@ static struct lm_gguf_buf lm_gguf_buf_init(size_t size) {
|
|
7489
7506
|
return buf;
|
7490
7507
|
}
|
7491
7508
|
|
7492
|
-
|
7509
|
+
void lm_gguf_buf_free(struct lm_gguf_buf buf) {
|
7493
7510
|
if (buf.data) {
|
7494
7511
|
LM_GGML_FREE(buf.data);
|
7495
7512
|
}
|
@@ -7527,7 +7544,7 @@ static void lm_gguf_bwrite_el(struct lm_gguf_buf * buf, const void * val, size_t
|
|
7527
7544
|
buf->offset += el_size;
|
7528
7545
|
}
|
7529
7546
|
|
7530
|
-
|
7547
|
+
void lm_gguf_write_to_buf(const struct lm_gguf_context * ctx, struct lm_gguf_buf * buf, bool only_meta) {
|
7531
7548
|
// write header
|
7532
7549
|
lm_gguf_bwrite_el(buf, &ctx->header.magic, sizeof(ctx->header.magic));
|
7533
7550
|
lm_gguf_bwrite_el(buf, &ctx->header.version, sizeof(ctx->header.version));
|
package/cpp/ggml.h
CHANGED
@@ -1565,17 +1565,6 @@ extern "C" {
|
|
1565
1565
|
int d1, // dilation dimension 1
|
1566
1566
|
bool is_2D);
|
1567
1567
|
|
1568
|
-
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_depthwise_2d(
|
1569
|
-
struct lm_ggml_context * ctx,
|
1570
|
-
struct lm_ggml_tensor * a, // convolution kernel
|
1571
|
-
struct lm_ggml_tensor * b, // data
|
1572
|
-
int s0, // stride dimension 0
|
1573
|
-
int s1, // stride dimension 1
|
1574
|
-
int p0, // padding dimension 0
|
1575
|
-
int p1, // padding dimension 1
|
1576
|
-
int d0, // dilation dimension 0
|
1577
|
-
int d1); // dilation dimension 1
|
1578
|
-
|
1579
1568
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d(
|
1580
1569
|
struct lm_ggml_context * ctx,
|
1581
1570
|
struct lm_ggml_tensor * a, // convolution kernel
|
@@ -1593,6 +1582,23 @@ extern "C" {
|
|
1593
1582
|
int s, // stride
|
1594
1583
|
int d); // dilation
|
1595
1584
|
|
1585
|
+
// depthwise
|
1586
|
+
// TODO: this is very likely wrong for some cases! - needs more testing
|
1587
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw(
|
1588
|
+
struct lm_ggml_context * ctx,
|
1589
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1590
|
+
struct lm_ggml_tensor * b, // data
|
1591
|
+
int s0, // stride
|
1592
|
+
int p0, // padding
|
1593
|
+
int d0); // dilation
|
1594
|
+
|
1595
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_1d_dw_ph(
|
1596
|
+
struct lm_ggml_context * ctx,
|
1597
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1598
|
+
struct lm_ggml_tensor * b, // data
|
1599
|
+
int s0, // stride
|
1600
|
+
int d0); // dilation
|
1601
|
+
|
1596
1602
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_1d(
|
1597
1603
|
struct lm_ggml_context * ctx,
|
1598
1604
|
struct lm_ggml_tensor * a, // convolution kernel
|
@@ -1612,7 +1618,6 @@ extern "C" {
|
|
1612
1618
|
int d0, // dilation dimension 0
|
1613
1619
|
int d1); // dilation dimension 1
|
1614
1620
|
|
1615
|
-
|
1616
1621
|
// kernel size is a->ne[0] x a->ne[1]
|
1617
1622
|
// stride is equal to kernel size
|
1618
1623
|
// padding is zero
|
@@ -1639,6 +1644,18 @@ extern "C" {
|
|
1639
1644
|
struct lm_ggml_tensor * a,
|
1640
1645
|
struct lm_ggml_tensor * b);
|
1641
1646
|
|
1647
|
+
// depthwise
|
1648
|
+
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_2d_dw(
|
1649
|
+
struct lm_ggml_context * ctx,
|
1650
|
+
struct lm_ggml_tensor * a, // convolution kernel
|
1651
|
+
struct lm_ggml_tensor * b, // data
|
1652
|
+
int s0, // stride dimension 0
|
1653
|
+
int s1, // stride dimension 1
|
1654
|
+
int p0, // padding dimension 0
|
1655
|
+
int p1, // padding dimension 1
|
1656
|
+
int d0, // dilation dimension 0
|
1657
|
+
int d1); // dilation dimension 1
|
1658
|
+
|
1642
1659
|
LM_GGML_API struct lm_ggml_tensor * lm_ggml_conv_transpose_2d_p0(
|
1643
1660
|
struct lm_ggml_context * ctx,
|
1644
1661
|
struct lm_ggml_tensor * a,
|
package/cpp/llama-grammar.cpp
CHANGED
@@ -822,15 +822,11 @@ llama_grammar_stacks & llama_grammar_get_stacks(struct llama_grammar * grammar)
|
|
822
822
|
return grammar->stacks;
|
823
823
|
}
|
824
824
|
|
825
|
-
void llama_grammar_accept(
|
826
|
-
|
827
|
-
|
828
|
-
const uint32_t chr,
|
829
|
-
llama_grammar_stacks & stacks_new) {
|
830
|
-
stacks_new.clear();
|
831
|
-
stacks_new.reserve(stacks.size());
|
825
|
+
void llama_grammar_accept(struct llama_grammar * grammar, uint32_t chr) {
|
826
|
+
llama_grammar_stacks stacks_new;
|
827
|
+
stacks_new.reserve(grammar->stacks.size());
|
832
828
|
|
833
|
-
for (const auto & stack : stacks) {
|
829
|
+
for (const auto & stack : grammar->stacks) {
|
834
830
|
if (stack.empty()) {
|
835
831
|
continue;
|
836
832
|
}
|
@@ -844,9 +840,11 @@ void llama_grammar_accept(
|
|
844
840
|
if (!llama_grammar_is_end_of_sequence(pos)) {
|
845
841
|
new_stack.push_back(pos);
|
846
842
|
}
|
847
|
-
llama_grammar_advance_stack(rules, new_stack, stacks_new);
|
843
|
+
llama_grammar_advance_stack(grammar->rules, new_stack, stacks_new);
|
848
844
|
}
|
849
845
|
}
|
846
|
+
|
847
|
+
grammar->stacks = std::move(stacks_new);
|
850
848
|
}
|
851
849
|
|
852
850
|
llama_grammar_candidates llama_grammar_reject_candidates_for_stack(
|
@@ -1051,7 +1049,12 @@ void llama_grammar_free_impl(struct llama_grammar * grammar) {
|
|
1051
1049
|
}
|
1052
1050
|
|
1053
1051
|
struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & grammar) {
|
1054
|
-
llama_grammar * result = new llama_grammar {
|
1052
|
+
llama_grammar * result = new llama_grammar {
|
1053
|
+
grammar.vocab,
|
1054
|
+
grammar.rules,
|
1055
|
+
grammar.stacks,
|
1056
|
+
grammar.partial_utf8,
|
1057
|
+
};
|
1055
1058
|
|
1056
1059
|
// redirect elements in stacks to point to new rules
|
1057
1060
|
for (size_t is = 0; is < result->stacks.size(); is++) {
|
@@ -1059,7 +1062,7 @@ struct llama_grammar * llama_grammar_clone_impl(const struct llama_grammar & gra
|
|
1059
1062
|
for (size_t ir0 = 0; ir0 < grammar.rules.size(); ir0++) {
|
1060
1063
|
for (size_t ir1 = 0; ir1 < grammar.rules[ir0].size(); ir1++) {
|
1061
1064
|
if (grammar.stacks[is][ie] == &grammar.rules[ir0][ir1]) {
|
1062
|
-
|
1065
|
+
result->stacks[is][ie] = &result->rules[ir0][ir1];
|
1063
1066
|
}
|
1064
1067
|
}
|
1065
1068
|
}
|
@@ -1126,11 +1129,8 @@ void llama_grammar_accept_impl(struct llama_grammar & grammar, llama_token token
|
|
1126
1129
|
const auto decoded = decode_utf8(piece, grammar.partial_utf8);
|
1127
1130
|
const auto & code_points = decoded.first;
|
1128
1131
|
|
1129
|
-
llama_grammar_stacks stacks_new;
|
1130
|
-
|
1131
1132
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
1132
|
-
llama_grammar_accept(grammar
|
1133
|
-
grammar.stacks = std::move(stacks_new);
|
1133
|
+
llama_grammar_accept(&grammar, *it);
|
1134
1134
|
}
|
1135
1135
|
|
1136
1136
|
grammar.partial_utf8 = decoded.second;
|