llama_cpp 0.10.1 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +16 -1
- data/ext/llama_cpp/src/ggml-alloc.c +12 -4
- data/ext/llama_cpp/src/ggml-backend-impl.h +12 -8
- data/ext/llama_cpp/src/ggml-backend.c +75 -5
- data/ext/llama_cpp/src/ggml-backend.h +7 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +284 -162
- data/ext/llama_cpp/src/ggml-metal.h +3 -0
- data/ext/llama_cpp/src/ggml-metal.m +190 -44
- data/ext/llama_cpp/src/ggml-metal.metal +11 -2
- data/ext/llama_cpp/src/ggml.c +262 -89
- data/ext/llama_cpp/src/ggml.h +24 -10
- data/ext/llama_cpp/src/llama.cpp +926 -780
- data/ext/llama_cpp/src/llama.h +8 -3
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -303,7 +303,7 @@ extern "C" {
|
|
303
303
|
|
304
304
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
305
305
|
typedef half ggml_fp16_t;
|
306
|
-
#elif defined(__ARM_NEON)
|
306
|
+
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
307
307
|
typedef __fp16 ggml_fp16_t;
|
308
308
|
#else
|
309
309
|
typedef uint16_t ggml_fp16_t;
|
@@ -343,6 +343,12 @@ extern "C" {
|
|
343
343
|
GGML_TYPE_COUNT,
|
344
344
|
};
|
345
345
|
|
346
|
+
// precision
|
347
|
+
enum ggml_prec {
|
348
|
+
GGML_PREC_DEFAULT,
|
349
|
+
GGML_PREC_F32,
|
350
|
+
};
|
351
|
+
|
346
352
|
enum ggml_backend_type {
|
347
353
|
GGML_BACKEND_CPU = 0,
|
348
354
|
GGML_BACKEND_GPU = 10,
|
@@ -478,7 +484,8 @@ extern "C" {
|
|
478
484
|
enum ggml_log_level {
|
479
485
|
GGML_LOG_LEVEL_ERROR = 2,
|
480
486
|
GGML_LOG_LEVEL_WARN = 3,
|
481
|
-
GGML_LOG_LEVEL_INFO = 4
|
487
|
+
GGML_LOG_LEVEL_INFO = 4,
|
488
|
+
GGML_LOG_LEVEL_DEBUG = 5
|
482
489
|
};
|
483
490
|
|
484
491
|
// ggml object
|
@@ -729,8 +736,8 @@ extern "C" {
|
|
729
736
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
730
737
|
|
731
738
|
// Context tensor enumeration and lookup
|
732
|
-
GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
|
733
|
-
GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
|
739
|
+
GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
|
740
|
+
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
|
734
741
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
735
742
|
|
736
743
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
@@ -1057,6 +1064,12 @@ extern "C" {
|
|
1057
1064
|
struct ggml_tensor * a,
|
1058
1065
|
struct ggml_tensor * b);
|
1059
1066
|
|
1067
|
+
// change the precision of a matrix multiplication
|
1068
|
+
// set to GGML_PREC_F32 for higher precision (useful for phi-2)
|
1069
|
+
GGML_API void ggml_mul_mat_set_prec(
|
1070
|
+
struct ggml_tensor * a,
|
1071
|
+
enum ggml_prec prec);
|
1072
|
+
|
1060
1073
|
// indirect matrix multiplication
|
1061
1074
|
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1062
1075
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
@@ -1082,13 +1095,13 @@ extern "C" {
|
|
1082
1095
|
GGML_API struct ggml_tensor * ggml_scale(
|
1083
1096
|
struct ggml_context * ctx,
|
1084
1097
|
struct ggml_tensor * a,
|
1085
|
-
|
1098
|
+
float s);
|
1086
1099
|
|
1087
1100
|
// in-place, returns view(a)
|
1088
1101
|
GGML_API struct ggml_tensor * ggml_scale_inplace(
|
1089
1102
|
struct ggml_context * ctx,
|
1090
1103
|
struct ggml_tensor * a,
|
1091
|
-
|
1104
|
+
float s);
|
1092
1105
|
|
1093
1106
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1094
1107
|
GGML_API struct ggml_tensor * ggml_set(
|
@@ -2123,10 +2136,11 @@ extern "C" {
|
|
2123
2136
|
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
2124
2137
|
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
2125
2138
|
|
2126
|
-
GGML_API int
|
2127
|
-
GGML_API int
|
2128
|
-
GGML_API size_t
|
2129
|
-
GGML_API char *
|
2139
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
2140
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
2141
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
2142
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
2143
|
+
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
2130
2144
|
|
2131
2145
|
// overrides existing values or adds a new one
|
2132
2146
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|