llama_cpp 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/ext/llama_cpp/llama_cpp.cpp +16 -1
- data/ext/llama_cpp/src/ggml-alloc.c +12 -4
- data/ext/llama_cpp/src/ggml-backend-impl.h +12 -8
- data/ext/llama_cpp/src/ggml-backend.c +75 -5
- data/ext/llama_cpp/src/ggml-backend.h +7 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +284 -162
- data/ext/llama_cpp/src/ggml-metal.h +3 -0
- data/ext/llama_cpp/src/ggml-metal.m +190 -44
- data/ext/llama_cpp/src/ggml-metal.metal +11 -2
- data/ext/llama_cpp/src/ggml.c +262 -89
- data/ext/llama_cpp/src/ggml.h +24 -10
- data/ext/llama_cpp/src/llama.cpp +926 -780
- data/ext/llama_cpp/src/llama.h +8 -3
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -303,7 +303,7 @@ extern "C" {
|
|
303
303
|
|
304
304
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
305
305
|
typedef half ggml_fp16_t;
|
306
|
-
#elif defined(__ARM_NEON)
|
306
|
+
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
|
307
307
|
typedef __fp16 ggml_fp16_t;
|
308
308
|
#else
|
309
309
|
typedef uint16_t ggml_fp16_t;
|
@@ -343,6 +343,12 @@ extern "C" {
|
|
343
343
|
GGML_TYPE_COUNT,
|
344
344
|
};
|
345
345
|
|
346
|
+
// precision
|
347
|
+
enum ggml_prec {
|
348
|
+
GGML_PREC_DEFAULT,
|
349
|
+
GGML_PREC_F32,
|
350
|
+
};
|
351
|
+
|
346
352
|
enum ggml_backend_type {
|
347
353
|
GGML_BACKEND_CPU = 0,
|
348
354
|
GGML_BACKEND_GPU = 10,
|
@@ -478,7 +484,8 @@ extern "C" {
|
|
478
484
|
enum ggml_log_level {
|
479
485
|
GGML_LOG_LEVEL_ERROR = 2,
|
480
486
|
GGML_LOG_LEVEL_WARN = 3,
|
481
|
-
GGML_LOG_LEVEL_INFO = 4
|
487
|
+
GGML_LOG_LEVEL_INFO = 4,
|
488
|
+
GGML_LOG_LEVEL_DEBUG = 5
|
482
489
|
};
|
483
490
|
|
484
491
|
// ggml object
|
@@ -729,8 +736,8 @@ extern "C" {
|
|
729
736
|
GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src);
|
730
737
|
|
731
738
|
// Context tensor enumeration and lookup
|
732
|
-
GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx);
|
733
|
-
GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor);
|
739
|
+
GGML_API struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx);
|
740
|
+
GGML_API struct ggml_tensor * ggml_get_next_tensor (const struct ggml_context * ctx, struct ggml_tensor * tensor);
|
734
741
|
GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name);
|
735
742
|
|
736
743
|
GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor);
|
@@ -1057,6 +1064,12 @@ extern "C" {
|
|
1057
1064
|
struct ggml_tensor * a,
|
1058
1065
|
struct ggml_tensor * b);
|
1059
1066
|
|
1067
|
+
// change the precision of a matrix multiplication
|
1068
|
+
// set to GGML_PREC_F32 for higher precision (useful for phi-2)
|
1069
|
+
GGML_API void ggml_mul_mat_set_prec(
|
1070
|
+
struct ggml_tensor * a,
|
1071
|
+
enum ggml_prec prec);
|
1072
|
+
|
1060
1073
|
// indirect matrix multiplication
|
1061
1074
|
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1062
1075
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
@@ -1082,13 +1095,13 @@ extern "C" {
|
|
1082
1095
|
GGML_API struct ggml_tensor * ggml_scale(
|
1083
1096
|
struct ggml_context * ctx,
|
1084
1097
|
struct ggml_tensor * a,
|
1085
|
-
|
1098
|
+
float s);
|
1086
1099
|
|
1087
1100
|
// in-place, returns view(a)
|
1088
1101
|
GGML_API struct ggml_tensor * ggml_scale_inplace(
|
1089
1102
|
struct ggml_context * ctx,
|
1090
1103
|
struct ggml_tensor * a,
|
1091
|
-
|
1104
|
+
float s);
|
1092
1105
|
|
1093
1106
|
// b -> view(a,offset,nb1,nb2,3), return modified a
|
1094
1107
|
GGML_API struct ggml_tensor * ggml_set(
|
@@ -2123,10 +2136,11 @@ extern "C" {
|
|
2123
2136
|
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
2124
2137
|
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
2125
2138
|
|
2126
|
-
GGML_API int
|
2127
|
-
GGML_API int
|
2128
|
-
GGML_API size_t
|
2129
|
-
GGML_API char *
|
2139
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
2140
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
2141
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
2142
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
2143
|
+
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
|
2130
2144
|
|
2131
2145
|
// overrides existing values or adds a new one
|
2132
2146
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|