llama_cpp 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +6 -5
- data/examples/chat.rb +13 -13
- data/examples/embedding.rb +9 -9
- data/ext/llama_cpp/llama_cpp.cpp +547 -272
- data/ext/llama_cpp/src/ggml-alloc.c +14 -8
- data/ext/llama_cpp/src/ggml-alloc.h +1 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +307 -127
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +200 -94
- data/ext/llama_cpp/src/ggml-metal.metal +264 -82
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -3
- data/ext/llama_cpp/src/ggml.c +1647 -865
- data/ext/llama_cpp/src/ggml.h +143 -52
- data/ext/llama_cpp/src/llama.cpp +1427 -635
- data/ext/llama_cpp/src/llama.h +308 -119
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +5 -9
- data/sig/llama_cpp.rbs +65 -34
- metadata +3 -3
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -195,6 +195,14 @@
|
|
195
195
|
# define GGML_DEPRECATED(func, hint) func
|
196
196
|
#endif
|
197
197
|
|
198
|
+
#ifndef __GNUC__
|
199
|
+
# define GGML_ATTRIBUTE_FORMAT(...)
|
200
|
+
#elif defined(__MINGW32__)
|
201
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
202
|
+
#else
|
203
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
204
|
+
#endif
|
205
|
+
|
198
206
|
#include <stdint.h>
|
199
207
|
#include <stddef.h>
|
200
208
|
#include <stdbool.h>
|
@@ -206,8 +214,8 @@
|
|
206
214
|
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
207
215
|
|
208
216
|
#define GGML_MAX_DIMS 4
|
209
|
-
#define GGML_MAX_NODES
|
210
|
-
#define GGML_MAX_PARAMS
|
217
|
+
#define GGML_MAX_NODES 16384
|
218
|
+
#define GGML_MAX_PARAMS 1024
|
211
219
|
#define GGML_MAX_CONTEXTS 64
|
212
220
|
#define GGML_MAX_SRC 6
|
213
221
|
#define GGML_MAX_NAME 64
|
@@ -240,6 +248,14 @@
|
|
240
248
|
} \
|
241
249
|
} while (0)
|
242
250
|
|
251
|
+
#ifndef NDEBUG
|
252
|
+
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
|
253
|
+
#elif defined(__GNUC__)
|
254
|
+
#define GGML_UNREACHABLE() __builtin_unreachable()
|
255
|
+
#else
|
256
|
+
#define GGML_UNREACHABLE() ((void) 0)
|
257
|
+
#endif
|
258
|
+
|
243
259
|
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
244
260
|
// main purpose is to reduce code duplication and improve readability.
|
245
261
|
//
|
@@ -270,7 +286,7 @@ extern "C" {
|
|
270
286
|
|
271
287
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
288
|
typedef half ggml_fp16_t;
|
273
|
-
#elif defined(__ARM_NEON)
|
289
|
+
#elif defined(__ARM_NEON)
|
274
290
|
typedef __fp16 ggml_fp16_t;
|
275
291
|
#else
|
276
292
|
typedef uint16_t ggml_fp16_t;
|
@@ -437,6 +453,12 @@ extern "C" {
|
|
437
453
|
GGML_OBJECT_WORK_BUFFER
|
438
454
|
};
|
439
455
|
|
456
|
+
enum ggml_log_level {
|
457
|
+
GGML_LOG_LEVEL_ERROR = 2,
|
458
|
+
GGML_LOG_LEVEL_WARN = 3,
|
459
|
+
GGML_LOG_LEVEL_INFO = 4
|
460
|
+
};
|
461
|
+
|
440
462
|
// ggml object
|
441
463
|
struct ggml_object {
|
442
464
|
size_t offs;
|
@@ -459,8 +481,8 @@ extern "C" {
|
|
459
481
|
int n_dims;
|
460
482
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
461
483
|
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
462
|
-
// nb[0] =
|
463
|
-
// nb[1] = nb[0] * ne[0] + padding
|
484
|
+
// nb[0] = ggml_type_size(type)
|
485
|
+
// nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding
|
464
486
|
// nb[i] = nb[i-1] * ne[i-1]
|
465
487
|
|
466
488
|
// compute data
|
@@ -512,7 +534,15 @@ extern "C" {
|
|
512
534
|
// next prime after GGML_MAX_NODES
|
513
535
|
// #define GGML_GRAPH_HASHTABLE_SIZE 4099
|
514
536
|
// next prime after GGML_MAX_NODES * 2 (nodes + leafs)
|
515
|
-
#define GGML_GRAPH_HASHTABLE_SIZE 8273
|
537
|
+
// #define GGML_GRAPH_HASHTABLE_SIZE 8273
|
538
|
+
// #define GGML_GRAPH_HASHTABLE_SIZE 16411
|
539
|
+
#define GGML_GRAPH_HASHTABLE_SIZE 32771
|
540
|
+
|
541
|
+
enum ggml_cgraph_eval_order {
|
542
|
+
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
543
|
+
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
544
|
+
GGML_CGRAPH_EVAL_ORDER_COUNT
|
545
|
+
};
|
516
546
|
|
517
547
|
// computation graph
|
518
548
|
struct ggml_cgraph {
|
@@ -525,6 +555,8 @@ extern "C" {
|
|
525
555
|
|
526
556
|
void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
|
527
557
|
|
558
|
+
enum ggml_cgraph_eval_order order;
|
559
|
+
|
528
560
|
// performance
|
529
561
|
int perf_runs;
|
530
562
|
int64_t perf_cycles;
|
@@ -672,12 +704,21 @@ extern "C" {
|
|
672
704
|
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
673
705
|
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
674
706
|
|
707
|
+
// Converts a flat index into coordinates
|
708
|
+
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
709
|
+
|
675
710
|
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
676
711
|
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
677
712
|
|
713
|
+
GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
714
|
+
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
715
|
+
|
678
716
|
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
679
717
|
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
680
718
|
|
719
|
+
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
720
|
+
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
721
|
+
|
681
722
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
682
723
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
683
724
|
|
@@ -685,6 +726,7 @@ extern "C" {
|
|
685
726
|
|
686
727
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
687
728
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
729
|
+
GGML_ATTRIBUTE_FORMAT(2, 3)
|
688
730
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
689
731
|
|
690
732
|
//
|
@@ -710,6 +752,12 @@ extern "C" {
|
|
710
752
|
struct ggml_tensor * a,
|
711
753
|
struct ggml_tensor * b);
|
712
754
|
|
755
|
+
GGML_API struct ggml_tensor * ggml_add_cast(
|
756
|
+
struct ggml_context * ctx,
|
757
|
+
struct ggml_tensor * a,
|
758
|
+
struct ggml_tensor * b,
|
759
|
+
enum ggml_type type);
|
760
|
+
|
713
761
|
GGML_API struct ggml_tensor * ggml_add1(
|
714
762
|
struct ggml_context * ctx,
|
715
763
|
struct ggml_tensor * a,
|
@@ -819,6 +867,7 @@ extern "C" {
|
|
819
867
|
struct ggml_tensor * a,
|
820
868
|
struct ggml_tensor * b);
|
821
869
|
|
870
|
+
// sums repetitions in a into shape of b
|
822
871
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
823
872
|
struct ggml_context * ctx,
|
824
873
|
struct ggml_tensor * a,
|
@@ -1040,7 +1089,6 @@ extern "C" {
|
|
1040
1089
|
size_t nb1,
|
1041
1090
|
size_t offset);
|
1042
1091
|
|
1043
|
-
|
1044
1092
|
// a -> b, return view(b)
|
1045
1093
|
GGML_API struct ggml_tensor * ggml_cpy(
|
1046
1094
|
struct ggml_context * ctx,
|
@@ -1063,6 +1111,33 @@ extern "C" {
|
|
1063
1111
|
struct ggml_context * ctx,
|
1064
1112
|
struct ggml_tensor * a);
|
1065
1113
|
|
1114
|
+
// make contiguous, with new shape
|
1115
|
+
GGML_API struct ggml_tensor * ggml_cont_1d(
|
1116
|
+
struct ggml_context * ctx,
|
1117
|
+
struct ggml_tensor * a,
|
1118
|
+
int64_t ne0);
|
1119
|
+
|
1120
|
+
GGML_API struct ggml_tensor * ggml_cont_2d(
|
1121
|
+
struct ggml_context * ctx,
|
1122
|
+
struct ggml_tensor * a,
|
1123
|
+
int64_t ne0,
|
1124
|
+
int64_t ne1);
|
1125
|
+
|
1126
|
+
GGML_API struct ggml_tensor * ggml_cont_3d(
|
1127
|
+
struct ggml_context * ctx,
|
1128
|
+
struct ggml_tensor * a,
|
1129
|
+
int64_t ne0,
|
1130
|
+
int64_t ne1,
|
1131
|
+
int64_t ne2);
|
1132
|
+
|
1133
|
+
GGML_API struct ggml_tensor * ggml_cont_4d(
|
1134
|
+
struct ggml_context * ctx,
|
1135
|
+
struct ggml_tensor * a,
|
1136
|
+
int64_t ne0,
|
1137
|
+
int64_t ne1,
|
1138
|
+
int64_t ne2,
|
1139
|
+
int64_t ne3);
|
1140
|
+
|
1066
1141
|
// return view(a), b specifies the new shape
|
1067
1142
|
// TODO: when we start computing gradient, make a copy instead of view
|
1068
1143
|
GGML_API struct ggml_tensor * ggml_reshape(
|
@@ -1210,14 +1285,15 @@ extern "C" {
|
|
1210
1285
|
struct ggml_tensor * b);
|
1211
1286
|
|
1212
1287
|
// rotary position embedding
|
1213
|
-
// if mode & 1 == 1, skip n_past elements
|
1288
|
+
// if mode & 1 == 1, skip n_past elements (DEPRECATED)
|
1214
1289
|
// if mode & 2 == 1, GPT-NeoX style
|
1215
1290
|
// if mode & 4 == 1, ChatGLM style
|
1216
|
-
//
|
1291
|
+
//
|
1292
|
+
// b is an int32 vector with size a->ne[2], it contains the positions
|
1217
1293
|
GGML_API struct ggml_tensor * ggml_rope(
|
1218
1294
|
struct ggml_context * ctx,
|
1219
1295
|
struct ggml_tensor * a,
|
1220
|
-
|
1296
|
+
struct ggml_tensor * b,
|
1221
1297
|
int n_dims,
|
1222
1298
|
int mode,
|
1223
1299
|
int n_ctx);
|
@@ -1226,7 +1302,7 @@ extern "C" {
|
|
1226
1302
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
1227
1303
|
struct ggml_context * ctx,
|
1228
1304
|
struct ggml_tensor * a,
|
1229
|
-
|
1305
|
+
struct ggml_tensor * b,
|
1230
1306
|
int n_dims,
|
1231
1307
|
int mode,
|
1232
1308
|
int n_ctx);
|
@@ -1235,7 +1311,7 @@ extern "C" {
|
|
1235
1311
|
GGML_API struct ggml_tensor * ggml_rope_custom(
|
1236
1312
|
struct ggml_context * ctx,
|
1237
1313
|
struct ggml_tensor * a,
|
1238
|
-
|
1314
|
+
struct ggml_tensor * b,
|
1239
1315
|
int n_dims,
|
1240
1316
|
int mode,
|
1241
1317
|
int n_ctx,
|
@@ -1246,7 +1322,7 @@ extern "C" {
|
|
1246
1322
|
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
1247
1323
|
struct ggml_context * ctx,
|
1248
1324
|
struct ggml_tensor * a,
|
1249
|
-
|
1325
|
+
struct ggml_tensor * b,
|
1250
1326
|
int n_dims,
|
1251
1327
|
int mode,
|
1252
1328
|
int n_ctx,
|
@@ -1257,7 +1333,7 @@ extern "C" {
|
|
1257
1333
|
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
1258
1334
|
struct ggml_context * ctx,
|
1259
1335
|
struct ggml_tensor * a,
|
1260
|
-
|
1336
|
+
struct ggml_tensor * b,
|
1261
1337
|
int n_dims,
|
1262
1338
|
float base,
|
1263
1339
|
bool down);
|
@@ -1267,7 +1343,7 @@ extern "C" {
|
|
1267
1343
|
GGML_API struct ggml_tensor * ggml_rope_back(
|
1268
1344
|
struct ggml_context * ctx,
|
1269
1345
|
struct ggml_tensor * a,
|
1270
|
-
|
1346
|
+
struct ggml_tensor * b,
|
1271
1347
|
int n_dims,
|
1272
1348
|
int mode,
|
1273
1349
|
int n_ctx,
|
@@ -1647,6 +1723,16 @@ extern "C" {
|
|
1647
1723
|
// dump the graph into a file using the dot format
|
1648
1724
|
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
1649
1725
|
|
1726
|
+
// build gradient checkpointing backward graph gb for gf using provided checkpoints
|
1727
|
+
// gb_tmp will contain original backward graph with rewritten backward process nodes,
|
1728
|
+
// but without the second forward pass nodes.
|
1729
|
+
GGML_API void ggml_build_backward_gradient_checkpointing(
|
1730
|
+
struct ggml_context * ctx,
|
1731
|
+
struct ggml_cgraph * gf,
|
1732
|
+
struct ggml_cgraph * gb,
|
1733
|
+
struct ggml_cgraph * gb_tmp,
|
1734
|
+
struct ggml_tensor * * checkpoints,
|
1735
|
+
int n_checkpoints);
|
1650
1736
|
//
|
1651
1737
|
// optimization
|
1652
1738
|
//
|
@@ -1681,7 +1767,8 @@ extern "C" {
|
|
1681
1767
|
GGML_LINESEARCH_INVALID_PARAMETERS,
|
1682
1768
|
};
|
1683
1769
|
|
1684
|
-
typedef void (*ggml_opt_callback)(void * data, float * sched);
|
1770
|
+
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
1771
|
+
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
1685
1772
|
|
1686
1773
|
// optimization parameters
|
1687
1774
|
//
|
@@ -1712,6 +1799,8 @@ extern "C" {
|
|
1712
1799
|
bool print_forward_graph;
|
1713
1800
|
bool print_backward_graph;
|
1714
1801
|
|
1802
|
+
int n_gradient_accumulation;
|
1803
|
+
|
1715
1804
|
// ADAM parameters
|
1716
1805
|
struct {
|
1717
1806
|
int n_iter;
|
@@ -1757,6 +1846,7 @@ extern "C" {
|
|
1757
1846
|
float loss_after;
|
1758
1847
|
|
1759
1848
|
struct {
|
1849
|
+
struct ggml_tensor * g; // current gradient
|
1760
1850
|
struct ggml_tensor * m; // first moment
|
1761
1851
|
struct ggml_tensor * v; // second moment
|
1762
1852
|
struct ggml_tensor * pf; // past function values
|
@@ -1866,39 +1956,39 @@ extern "C" {
|
|
1866
1956
|
|
1867
1957
|
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
1958
|
|
1869
|
-
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
-
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
-
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
-
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1873
|
-
|
1874
|
-
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
-
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
-
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int
|
1877
|
-
|
1878
|
-
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int
|
1879
|
-
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int
|
1880
|
-
|
1881
|
-
//
|
1882
|
-
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int
|
1883
|
-
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int
|
1884
|
-
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int
|
1885
|
-
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int
|
1886
|
-
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int
|
1887
|
-
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int
|
1888
|
-
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int
|
1889
|
-
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int
|
1890
|
-
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int
|
1891
|
-
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int
|
1892
|
-
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int
|
1893
|
-
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int
|
1894
|
-
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int
|
1895
|
-
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int
|
1896
|
-
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
-
|
1898
|
-
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
-
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
-
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
-
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1959
|
+
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
1960
|
+
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
1961
|
+
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
1962
|
+
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
1963
|
+
|
1964
|
+
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
1965
|
+
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
1966
|
+
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
|
1967
|
+
|
1968
|
+
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
|
1969
|
+
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
|
1970
|
+
|
1971
|
+
// will abort if the wrong type is used for the key
|
1972
|
+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
|
1973
|
+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
|
1974
|
+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
|
1975
|
+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
|
1976
|
+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
|
1977
|
+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
|
1978
|
+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
|
1979
|
+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
|
1980
|
+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
|
1981
|
+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
|
1982
|
+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
|
1983
|
+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
|
1984
|
+
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
|
1985
|
+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
1986
|
+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
1987
|
+
|
1988
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
1989
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
1990
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
1991
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
1902
1992
|
|
1903
1993
|
// overrides existing values or adds a new one
|
1904
1994
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
@@ -1943,11 +2033,11 @@ extern "C" {
|
|
1943
2033
|
//
|
1944
2034
|
|
1945
2035
|
// write the entire context to a binary file
|
1946
|
-
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
2036
|
+
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
2037
|
|
1948
2038
|
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
-
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
-
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
2039
|
+
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
2040
|
+
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
1951
2041
|
|
1952
2042
|
//
|
1953
2043
|
// system info
|
@@ -1961,6 +2051,7 @@ extern "C" {
|
|
1961
2051
|
GGML_API int ggml_cpu_has_fma (void);
|
1962
2052
|
GGML_API int ggml_cpu_has_neon (void);
|
1963
2053
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
2054
|
+
GGML_API int ggml_cpu_has_metal (void);
|
1964
2055
|
GGML_API int ggml_cpu_has_f16c (void);
|
1965
2056
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
1966
2057
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|