llama_cpp 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/README.md +39 -6
- data/examples/chat.rb +2 -1
- data/examples/embedding.rb +3 -2
- data/ext/llama_cpp/extconf.rb +13 -0
- data/ext/llama_cpp/llama_cpp.cpp +231 -132
- data/ext/llama_cpp/src/ggml-cuda.cu +319 -52
- data/ext/llama_cpp/src/ggml-metal.m +36 -30
- data/ext/llama_cpp/src/ggml-metal.metal +328 -84
- data/ext/llama_cpp/src/ggml.c +800 -303
- data/ext/llama_cpp/src/ggml.h +68 -5
- data/ext/llama_cpp/src/k_quants.c +1712 -56
- data/ext/llama_cpp/src/k_quants.h +41 -6
- data/ext/llama_cpp/src/llama-util.h +19 -5
- data/ext/llama_cpp/src/llama.cpp +138 -72
- data/ext/llama_cpp/src/llama.h +33 -5
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +0 -2
- data/sig/llama_cpp.rbs +12 -17
- metadata +2 -3
- data/lib/llama_cpp/client.rb +0 -172
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -198,7 +198,7 @@
|
|
198
198
|
#define GGML_MAX_PARAMS 256
|
199
199
|
#define GGML_MAX_CONTEXTS 64
|
200
200
|
#define GGML_MAX_OPT 4
|
201
|
-
#define GGML_MAX_NAME
|
201
|
+
#define GGML_MAX_NAME 48
|
202
202
|
#define GGML_DEFAULT_N_THREADS 4
|
203
203
|
|
204
204
|
#define GGML_ASSERT(x) \
|
@@ -345,6 +345,10 @@ extern "C" {
|
|
345
345
|
GGML_OP_MAP_UNARY,
|
346
346
|
GGML_OP_MAP_BINARY,
|
347
347
|
|
348
|
+
GGML_OP_MAP_CUSTOM1,
|
349
|
+
GGML_OP_MAP_CUSTOM2,
|
350
|
+
GGML_OP_MAP_CUSTOM3,
|
351
|
+
|
348
352
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
349
353
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
350
354
|
|
@@ -465,6 +469,9 @@ extern "C" {
|
|
465
469
|
GGML_API int64_t ggml_cycles(void);
|
466
470
|
GGML_API int64_t ggml_cycles_per_ms(void);
|
467
471
|
|
472
|
+
GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
|
473
|
+
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
474
|
+
|
468
475
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
469
476
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
470
477
|
|
@@ -563,6 +570,7 @@ extern "C" {
|
|
563
570
|
|
564
571
|
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
565
572
|
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
573
|
+
GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
|
566
574
|
|
567
575
|
//
|
568
576
|
// operations on tensors with backpropagation
|
@@ -1028,13 +1036,15 @@ extern "C" {
|
|
1028
1036
|
// rotary position embedding
|
1029
1037
|
// if mode & 1 == 1, skip n_past elements
|
1030
1038
|
// if mode & 2 == 1, GPT-NeoX style
|
1039
|
+
// if mode & 4 == 1, ChatGLM style
|
1031
1040
|
// TODO: avoid creating a new tensor every time
|
1032
1041
|
GGML_API struct ggml_tensor * ggml_rope(
|
1033
1042
|
struct ggml_context * ctx,
|
1034
1043
|
struct ggml_tensor * a,
|
1035
1044
|
int n_past,
|
1036
1045
|
int n_dims,
|
1037
|
-
int mode
|
1046
|
+
int mode,
|
1047
|
+
int n_ctx);
|
1038
1048
|
|
1039
1049
|
// in-place, returns view(a)
|
1040
1050
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
@@ -1042,7 +1052,8 @@ extern "C" {
|
|
1042
1052
|
struct ggml_tensor * a,
|
1043
1053
|
int n_past,
|
1044
1054
|
int n_dims,
|
1045
|
-
int mode
|
1055
|
+
int mode,
|
1056
|
+
int n_ctx);
|
1046
1057
|
|
1047
1058
|
// rotary position embedding backward, i.e compute dx from dy
|
1048
1059
|
// a - dy
|
@@ -1166,21 +1177,73 @@ extern "C" {
|
|
1166
1177
|
int h0,
|
1167
1178
|
int w);
|
1168
1179
|
|
1169
|
-
//
|
1170
|
-
|
1180
|
+
// custom operators
|
1181
|
+
|
1182
|
+
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
1171
1183
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1172
1184
|
|
1185
|
+
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1186
|
+
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1187
|
+
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1188
|
+
|
1173
1189
|
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
1174
1190
|
struct ggml_context * ctx,
|
1175
1191
|
struct ggml_tensor * a,
|
1176
1192
|
ggml_unary_op_f32_t fun);
|
1177
1193
|
|
1194
|
+
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1195
|
+
struct ggml_context * ctx,
|
1196
|
+
struct ggml_tensor * a,
|
1197
|
+
ggml_unary_op_f32_t fun);
|
1198
|
+
|
1178
1199
|
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1179
1200
|
struct ggml_context * ctx,
|
1180
1201
|
struct ggml_tensor * a,
|
1181
1202
|
struct ggml_tensor * b,
|
1182
1203
|
ggml_binary_op_f32_t fun);
|
1183
1204
|
|
1205
|
+
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1206
|
+
struct ggml_context * ctx,
|
1207
|
+
struct ggml_tensor * a,
|
1208
|
+
struct ggml_tensor * b,
|
1209
|
+
ggml_binary_op_f32_t fun);
|
1210
|
+
|
1211
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1212
|
+
struct ggml_context * ctx,
|
1213
|
+
struct ggml_tensor * a,
|
1214
|
+
ggml_custom1_op_f32_t fun);
|
1215
|
+
|
1216
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1217
|
+
struct ggml_context * ctx,
|
1218
|
+
struct ggml_tensor * a,
|
1219
|
+
ggml_custom1_op_f32_t fun);
|
1220
|
+
|
1221
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1222
|
+
struct ggml_context * ctx,
|
1223
|
+
struct ggml_tensor * a,
|
1224
|
+
struct ggml_tensor * b,
|
1225
|
+
ggml_custom2_op_f32_t fun);
|
1226
|
+
|
1227
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1228
|
+
struct ggml_context * ctx,
|
1229
|
+
struct ggml_tensor * a,
|
1230
|
+
struct ggml_tensor * b,
|
1231
|
+
ggml_custom2_op_f32_t fun);
|
1232
|
+
|
1233
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1234
|
+
struct ggml_context * ctx,
|
1235
|
+
struct ggml_tensor * a,
|
1236
|
+
struct ggml_tensor * b,
|
1237
|
+
struct ggml_tensor * c,
|
1238
|
+
ggml_custom3_op_f32_t fun);
|
1239
|
+
|
1240
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1241
|
+
struct ggml_context * ctx,
|
1242
|
+
struct ggml_tensor * a,
|
1243
|
+
struct ggml_tensor * b,
|
1244
|
+
struct ggml_tensor * c,
|
1245
|
+
ggml_custom3_op_f32_t fun);
|
1246
|
+
|
1184
1247
|
// loss function
|
1185
1248
|
|
1186
1249
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|