llama_cpp 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +39 -6
- data/examples/chat.rb +2 -1
- data/examples/embedding.rb +3 -2
- data/ext/llama_cpp/extconf.rb +13 -0
- data/ext/llama_cpp/llama_cpp.cpp +305 -133
- data/ext/llama_cpp/src/ggml-cuda.cu +367 -69
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.m +36 -30
- data/ext/llama_cpp/src/ggml-metal.metal +328 -84
- data/ext/llama_cpp/src/ggml-opencl.cpp +352 -175
- data/ext/llama_cpp/src/ggml.c +800 -303
- data/ext/llama_cpp/src/ggml.h +68 -5
- data/ext/llama_cpp/src/k_quants.c +1712 -56
- data/ext/llama_cpp/src/k_quants.h +41 -6
- data/ext/llama_cpp/src/llama-util.h +19 -5
- data/ext/llama_cpp/src/llama.cpp +262 -291
- data/ext/llama_cpp/src/llama.h +49 -11
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +0 -2
- data/sig/llama_cpp.rbs +14 -17
- metadata +2 -3
- data/lib/llama_cpp/client.rb +0 -172
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -198,7 +198,7 @@
|
|
198
198
|
#define GGML_MAX_PARAMS 256
|
199
199
|
#define GGML_MAX_CONTEXTS 64
|
200
200
|
#define GGML_MAX_OPT 4
|
201
|
-
#define GGML_MAX_NAME
|
201
|
+
#define GGML_MAX_NAME 48
|
202
202
|
#define GGML_DEFAULT_N_THREADS 4
|
203
203
|
|
204
204
|
#define GGML_ASSERT(x) \
|
@@ -345,6 +345,10 @@ extern "C" {
|
|
345
345
|
GGML_OP_MAP_UNARY,
|
346
346
|
GGML_OP_MAP_BINARY,
|
347
347
|
|
348
|
+
GGML_OP_MAP_CUSTOM1,
|
349
|
+
GGML_OP_MAP_CUSTOM2,
|
350
|
+
GGML_OP_MAP_CUSTOM3,
|
351
|
+
|
348
352
|
GGML_OP_CROSS_ENTROPY_LOSS,
|
349
353
|
GGML_OP_CROSS_ENTROPY_LOSS_BACK,
|
350
354
|
|
@@ -465,6 +469,9 @@ extern "C" {
|
|
465
469
|
GGML_API int64_t ggml_cycles(void);
|
466
470
|
GGML_API int64_t ggml_cycles_per_ms(void);
|
467
471
|
|
472
|
+
GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems
|
473
|
+
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
474
|
+
|
468
475
|
GGML_API void ggml_print_object (const struct ggml_object * obj);
|
469
476
|
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
|
470
477
|
|
@@ -563,6 +570,7 @@ extern "C" {
|
|
563
570
|
|
564
571
|
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
565
572
|
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
573
|
+
GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
|
566
574
|
|
567
575
|
//
|
568
576
|
// operations on tensors with backpropagation
|
@@ -1028,13 +1036,15 @@ extern "C" {
|
|
1028
1036
|
// rotary position embedding
|
1029
1037
|
// if mode & 1 == 1, skip n_past elements
|
1030
1038
|
// if mode & 2 == 1, GPT-NeoX style
|
1039
|
+
// if mode & 4 == 1, ChatGLM style
|
1031
1040
|
// TODO: avoid creating a new tensor every time
|
1032
1041
|
GGML_API struct ggml_tensor * ggml_rope(
|
1033
1042
|
struct ggml_context * ctx,
|
1034
1043
|
struct ggml_tensor * a,
|
1035
1044
|
int n_past,
|
1036
1045
|
int n_dims,
|
1037
|
-
int mode
|
1046
|
+
int mode,
|
1047
|
+
int n_ctx);
|
1038
1048
|
|
1039
1049
|
// in-place, returns view(a)
|
1040
1050
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
@@ -1042,7 +1052,8 @@ extern "C" {
|
|
1042
1052
|
struct ggml_tensor * a,
|
1043
1053
|
int n_past,
|
1044
1054
|
int n_dims,
|
1045
|
-
int mode
|
1055
|
+
int mode,
|
1056
|
+
int n_ctx);
|
1046
1057
|
|
1047
1058
|
// rotary position embedding backward, i.e compute dx from dy
|
1048
1059
|
// a - dy
|
@@ -1166,21 +1177,73 @@ extern "C" {
|
|
1166
1177
|
int h0,
|
1167
1178
|
int w);
|
1168
1179
|
|
1169
|
-
//
|
1170
|
-
|
1180
|
+
// custom operators
|
1181
|
+
|
1182
|
+
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
1171
1183
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
1172
1184
|
|
1185
|
+
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
1186
|
+
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1187
|
+
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
1188
|
+
|
1173
1189
|
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
1174
1190
|
struct ggml_context * ctx,
|
1175
1191
|
struct ggml_tensor * a,
|
1176
1192
|
ggml_unary_op_f32_t fun);
|
1177
1193
|
|
1194
|
+
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
1195
|
+
struct ggml_context * ctx,
|
1196
|
+
struct ggml_tensor * a,
|
1197
|
+
ggml_unary_op_f32_t fun);
|
1198
|
+
|
1178
1199
|
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
1179
1200
|
struct ggml_context * ctx,
|
1180
1201
|
struct ggml_tensor * a,
|
1181
1202
|
struct ggml_tensor * b,
|
1182
1203
|
ggml_binary_op_f32_t fun);
|
1183
1204
|
|
1205
|
+
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
1206
|
+
struct ggml_context * ctx,
|
1207
|
+
struct ggml_tensor * a,
|
1208
|
+
struct ggml_tensor * b,
|
1209
|
+
ggml_binary_op_f32_t fun);
|
1210
|
+
|
1211
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
1212
|
+
struct ggml_context * ctx,
|
1213
|
+
struct ggml_tensor * a,
|
1214
|
+
ggml_custom1_op_f32_t fun);
|
1215
|
+
|
1216
|
+
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
1217
|
+
struct ggml_context * ctx,
|
1218
|
+
struct ggml_tensor * a,
|
1219
|
+
ggml_custom1_op_f32_t fun);
|
1220
|
+
|
1221
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
1222
|
+
struct ggml_context * ctx,
|
1223
|
+
struct ggml_tensor * a,
|
1224
|
+
struct ggml_tensor * b,
|
1225
|
+
ggml_custom2_op_f32_t fun);
|
1226
|
+
|
1227
|
+
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
1228
|
+
struct ggml_context * ctx,
|
1229
|
+
struct ggml_tensor * a,
|
1230
|
+
struct ggml_tensor * b,
|
1231
|
+
ggml_custom2_op_f32_t fun);
|
1232
|
+
|
1233
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
1234
|
+
struct ggml_context * ctx,
|
1235
|
+
struct ggml_tensor * a,
|
1236
|
+
struct ggml_tensor * b,
|
1237
|
+
struct ggml_tensor * c,
|
1238
|
+
ggml_custom3_op_f32_t fun);
|
1239
|
+
|
1240
|
+
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
1241
|
+
struct ggml_context * ctx,
|
1242
|
+
struct ggml_tensor * a,
|
1243
|
+
struct ggml_tensor * b,
|
1244
|
+
struct ggml_tensor * c,
|
1245
|
+
ggml_custom3_op_f32_t fun);
|
1246
|
+
|
1184
1247
|
// loss function
|
1185
1248
|
|
1186
1249
|
GGML_API struct ggml_tensor * ggml_cross_entropy_loss(
|