llama_cpp 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +6 -5
- data/examples/chat.rb +13 -13
- data/examples/embedding.rb +9 -9
- data/ext/llama_cpp/llama_cpp.cpp +547 -272
- data/ext/llama_cpp/src/ggml-alloc.c +14 -8
- data/ext/llama_cpp/src/ggml-alloc.h +1 -0
- data/ext/llama_cpp/src/ggml-cuda.cu +307 -127
- data/ext/llama_cpp/src/ggml-cuda.h +1 -0
- data/ext/llama_cpp/src/ggml-metal.h +4 -0
- data/ext/llama_cpp/src/ggml-metal.m +200 -94
- data/ext/llama_cpp/src/ggml-metal.metal +264 -82
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -3
- data/ext/llama_cpp/src/ggml.c +1647 -865
- data/ext/llama_cpp/src/ggml.h +143 -52
- data/ext/llama_cpp/src/llama.cpp +1427 -635
- data/ext/llama_cpp/src/llama.h +308 -119
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +5 -9
- data/sig/llama_cpp.rbs +65 -34
- metadata +3 -3
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -195,6 +195,14 @@
|
|
195
195
|
# define GGML_DEPRECATED(func, hint) func
|
196
196
|
#endif
|
197
197
|
|
198
|
+
#ifndef __GNUC__
|
199
|
+
# define GGML_ATTRIBUTE_FORMAT(...)
|
200
|
+
#elif defined(__MINGW32__)
|
201
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
202
|
+
#else
|
203
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
204
|
+
#endif
|
205
|
+
|
198
206
|
#include <stdint.h>
|
199
207
|
#include <stddef.h>
|
200
208
|
#include <stdbool.h>
|
@@ -206,8 +214,8 @@
|
|
206
214
|
#define GGML_QNT_VERSION_FACTOR 1000 // do not change this
|
207
215
|
|
208
216
|
#define GGML_MAX_DIMS 4
|
209
|
-
#define GGML_MAX_NODES
|
210
|
-
#define GGML_MAX_PARAMS
|
217
|
+
#define GGML_MAX_NODES 16384
|
218
|
+
#define GGML_MAX_PARAMS 1024
|
211
219
|
#define GGML_MAX_CONTEXTS 64
|
212
220
|
#define GGML_MAX_SRC 6
|
213
221
|
#define GGML_MAX_NAME 64
|
@@ -240,6 +248,14 @@
|
|
240
248
|
} \
|
241
249
|
} while (0)
|
242
250
|
|
251
|
+
#ifndef NDEBUG
|
252
|
+
#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
|
253
|
+
#elif defined(__GNUC__)
|
254
|
+
#define GGML_UNREACHABLE() __builtin_unreachable()
|
255
|
+
#else
|
256
|
+
#define GGML_UNREACHABLE() ((void) 0)
|
257
|
+
#endif
|
258
|
+
|
243
259
|
// used to copy the number of elements and stride in bytes of tensors into local variables.
|
244
260
|
// main purpose is to reduce code duplication and improve readability.
|
245
261
|
//
|
@@ -270,7 +286,7 @@ extern "C" {
|
|
270
286
|
|
271
287
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
288
|
typedef half ggml_fp16_t;
|
273
|
-
#elif defined(__ARM_NEON)
|
289
|
+
#elif defined(__ARM_NEON)
|
274
290
|
typedef __fp16 ggml_fp16_t;
|
275
291
|
#else
|
276
292
|
typedef uint16_t ggml_fp16_t;
|
@@ -437,6 +453,12 @@ extern "C" {
|
|
437
453
|
GGML_OBJECT_WORK_BUFFER
|
438
454
|
};
|
439
455
|
|
456
|
+
enum ggml_log_level {
|
457
|
+
GGML_LOG_LEVEL_ERROR = 2,
|
458
|
+
GGML_LOG_LEVEL_WARN = 3,
|
459
|
+
GGML_LOG_LEVEL_INFO = 4
|
460
|
+
};
|
461
|
+
|
440
462
|
// ggml object
|
441
463
|
struct ggml_object {
|
442
464
|
size_t offs;
|
@@ -459,8 +481,8 @@ extern "C" {
|
|
459
481
|
int n_dims;
|
460
482
|
int64_t ne[GGML_MAX_DIMS]; // number of elements
|
461
483
|
size_t nb[GGML_MAX_DIMS]; // stride in bytes:
|
462
|
-
// nb[0] =
|
463
|
-
// nb[1] = nb[0] * ne[0] + padding
|
484
|
+
// nb[0] = ggml_type_size(type)
|
485
|
+
// nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding
|
464
486
|
// nb[i] = nb[i-1] * ne[i-1]
|
465
487
|
|
466
488
|
// compute data
|
@@ -512,7 +534,15 @@ extern "C" {
|
|
512
534
|
// next prime after GGML_MAX_NODES
|
513
535
|
// #define GGML_GRAPH_HASHTABLE_SIZE 4099
|
514
536
|
// next prime after GGML_MAX_NODES * 2 (nodes + leafs)
|
515
|
-
#define GGML_GRAPH_HASHTABLE_SIZE 8273
|
537
|
+
// #define GGML_GRAPH_HASHTABLE_SIZE 8273
|
538
|
+
// #define GGML_GRAPH_HASHTABLE_SIZE 16411
|
539
|
+
#define GGML_GRAPH_HASHTABLE_SIZE 32771
|
540
|
+
|
541
|
+
enum ggml_cgraph_eval_order {
|
542
|
+
GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0,
|
543
|
+
GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT,
|
544
|
+
GGML_CGRAPH_EVAL_ORDER_COUNT
|
545
|
+
};
|
516
546
|
|
517
547
|
// computation graph
|
518
548
|
struct ggml_cgraph {
|
@@ -525,6 +555,8 @@ extern "C" {
|
|
525
555
|
|
526
556
|
void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE];
|
527
557
|
|
558
|
+
enum ggml_cgraph_eval_order order;
|
559
|
+
|
528
560
|
// performance
|
529
561
|
int perf_runs;
|
530
562
|
int64_t perf_cycles;
|
@@ -672,12 +704,21 @@ extern "C" {
|
|
672
704
|
GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value);
|
673
705
|
GGML_API struct ggml_tensor * ggml_set_f32 (struct ggml_tensor * tensor, float value);
|
674
706
|
|
707
|
+
// Converts a flat index into coordinates
|
708
|
+
GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3);
|
709
|
+
|
675
710
|
GGML_API int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i);
|
676
711
|
GGML_API void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value);
|
677
712
|
|
713
|
+
GGML_API int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
714
|
+
GGML_API void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value);
|
715
|
+
|
678
716
|
GGML_API float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i);
|
679
717
|
GGML_API void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value);
|
680
718
|
|
719
|
+
GGML_API float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3);
|
720
|
+
GGML_API void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value);
|
721
|
+
|
681
722
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
682
723
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
683
724
|
|
@@ -685,6 +726,7 @@ extern "C" {
|
|
685
726
|
|
686
727
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
687
728
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
729
|
+
GGML_ATTRIBUTE_FORMAT(2, 3)
|
688
730
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
689
731
|
|
690
732
|
//
|
@@ -710,6 +752,12 @@ extern "C" {
|
|
710
752
|
struct ggml_tensor * a,
|
711
753
|
struct ggml_tensor * b);
|
712
754
|
|
755
|
+
GGML_API struct ggml_tensor * ggml_add_cast(
|
756
|
+
struct ggml_context * ctx,
|
757
|
+
struct ggml_tensor * a,
|
758
|
+
struct ggml_tensor * b,
|
759
|
+
enum ggml_type type);
|
760
|
+
|
713
761
|
GGML_API struct ggml_tensor * ggml_add1(
|
714
762
|
struct ggml_context * ctx,
|
715
763
|
struct ggml_tensor * a,
|
@@ -819,6 +867,7 @@ extern "C" {
|
|
819
867
|
struct ggml_tensor * a,
|
820
868
|
struct ggml_tensor * b);
|
821
869
|
|
870
|
+
// sums repetitions in a into shape of b
|
822
871
|
GGML_API struct ggml_tensor * ggml_repeat_back(
|
823
872
|
struct ggml_context * ctx,
|
824
873
|
struct ggml_tensor * a,
|
@@ -1040,7 +1089,6 @@ extern "C" {
|
|
1040
1089
|
size_t nb1,
|
1041
1090
|
size_t offset);
|
1042
1091
|
|
1043
|
-
|
1044
1092
|
// a -> b, return view(b)
|
1045
1093
|
GGML_API struct ggml_tensor * ggml_cpy(
|
1046
1094
|
struct ggml_context * ctx,
|
@@ -1063,6 +1111,33 @@ extern "C" {
|
|
1063
1111
|
struct ggml_context * ctx,
|
1064
1112
|
struct ggml_tensor * a);
|
1065
1113
|
|
1114
|
+
// make contiguous, with new shape
|
1115
|
+
GGML_API struct ggml_tensor * ggml_cont_1d(
|
1116
|
+
struct ggml_context * ctx,
|
1117
|
+
struct ggml_tensor * a,
|
1118
|
+
int64_t ne0);
|
1119
|
+
|
1120
|
+
GGML_API struct ggml_tensor * ggml_cont_2d(
|
1121
|
+
struct ggml_context * ctx,
|
1122
|
+
struct ggml_tensor * a,
|
1123
|
+
int64_t ne0,
|
1124
|
+
int64_t ne1);
|
1125
|
+
|
1126
|
+
GGML_API struct ggml_tensor * ggml_cont_3d(
|
1127
|
+
struct ggml_context * ctx,
|
1128
|
+
struct ggml_tensor * a,
|
1129
|
+
int64_t ne0,
|
1130
|
+
int64_t ne1,
|
1131
|
+
int64_t ne2);
|
1132
|
+
|
1133
|
+
GGML_API struct ggml_tensor * ggml_cont_4d(
|
1134
|
+
struct ggml_context * ctx,
|
1135
|
+
struct ggml_tensor * a,
|
1136
|
+
int64_t ne0,
|
1137
|
+
int64_t ne1,
|
1138
|
+
int64_t ne2,
|
1139
|
+
int64_t ne3);
|
1140
|
+
|
1066
1141
|
// return view(a), b specifies the new shape
|
1067
1142
|
// TODO: when we start computing gradient, make a copy instead of view
|
1068
1143
|
GGML_API struct ggml_tensor * ggml_reshape(
|
@@ -1210,14 +1285,15 @@ extern "C" {
|
|
1210
1285
|
struct ggml_tensor * b);
|
1211
1286
|
|
1212
1287
|
// rotary position embedding
|
1213
|
-
// if mode & 1 == 1, skip n_past elements
|
1288
|
+
// if mode & 1 == 1, skip n_past elements (DEPRECATED)
|
1214
1289
|
// if mode & 2 == 1, GPT-NeoX style
|
1215
1290
|
// if mode & 4 == 1, ChatGLM style
|
1216
|
-
//
|
1291
|
+
//
|
1292
|
+
// b is an int32 vector with size a->ne[2], it contains the positions
|
1217
1293
|
GGML_API struct ggml_tensor * ggml_rope(
|
1218
1294
|
struct ggml_context * ctx,
|
1219
1295
|
struct ggml_tensor * a,
|
1220
|
-
|
1296
|
+
struct ggml_tensor * b,
|
1221
1297
|
int n_dims,
|
1222
1298
|
int mode,
|
1223
1299
|
int n_ctx);
|
@@ -1226,7 +1302,7 @@ extern "C" {
|
|
1226
1302
|
GGML_API struct ggml_tensor * ggml_rope_inplace(
|
1227
1303
|
struct ggml_context * ctx,
|
1228
1304
|
struct ggml_tensor * a,
|
1229
|
-
|
1305
|
+
struct ggml_tensor * b,
|
1230
1306
|
int n_dims,
|
1231
1307
|
int mode,
|
1232
1308
|
int n_ctx);
|
@@ -1235,7 +1311,7 @@ extern "C" {
|
|
1235
1311
|
GGML_API struct ggml_tensor * ggml_rope_custom(
|
1236
1312
|
struct ggml_context * ctx,
|
1237
1313
|
struct ggml_tensor * a,
|
1238
|
-
|
1314
|
+
struct ggml_tensor * b,
|
1239
1315
|
int n_dims,
|
1240
1316
|
int mode,
|
1241
1317
|
int n_ctx,
|
@@ -1246,7 +1322,7 @@ extern "C" {
|
|
1246
1322
|
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
1247
1323
|
struct ggml_context * ctx,
|
1248
1324
|
struct ggml_tensor * a,
|
1249
|
-
|
1325
|
+
struct ggml_tensor * b,
|
1250
1326
|
int n_dims,
|
1251
1327
|
int mode,
|
1252
1328
|
int n_ctx,
|
@@ -1257,7 +1333,7 @@ extern "C" {
|
|
1257
1333
|
GGML_API struct ggml_tensor * ggml_rope_xpos_inplace(
|
1258
1334
|
struct ggml_context * ctx,
|
1259
1335
|
struct ggml_tensor * a,
|
1260
|
-
|
1336
|
+
struct ggml_tensor * b,
|
1261
1337
|
int n_dims,
|
1262
1338
|
float base,
|
1263
1339
|
bool down);
|
@@ -1267,7 +1343,7 @@ extern "C" {
|
|
1267
1343
|
GGML_API struct ggml_tensor * ggml_rope_back(
|
1268
1344
|
struct ggml_context * ctx,
|
1269
1345
|
struct ggml_tensor * a,
|
1270
|
-
|
1346
|
+
struct ggml_tensor * b,
|
1271
1347
|
int n_dims,
|
1272
1348
|
int mode,
|
1273
1349
|
int n_ctx,
|
@@ -1647,6 +1723,16 @@ extern "C" {
|
|
1647
1723
|
// dump the graph into a file using the dot format
|
1648
1724
|
GGML_API void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename);
|
1649
1725
|
|
1726
|
+
// build gradient checkpointing backward graph gb for gf using provided checkpoints
|
1727
|
+
// gb_tmp will contain original backward graph with rewritten backward process nodes,
|
1728
|
+
// but without the second forward pass nodes.
|
1729
|
+
GGML_API void ggml_build_backward_gradient_checkpointing(
|
1730
|
+
struct ggml_context * ctx,
|
1731
|
+
struct ggml_cgraph * gf,
|
1732
|
+
struct ggml_cgraph * gb,
|
1733
|
+
struct ggml_cgraph * gb_tmp,
|
1734
|
+
struct ggml_tensor * * checkpoints,
|
1735
|
+
int n_checkpoints);
|
1650
1736
|
//
|
1651
1737
|
// optimization
|
1652
1738
|
//
|
@@ -1681,7 +1767,8 @@ extern "C" {
|
|
1681
1767
|
GGML_LINESEARCH_INVALID_PARAMETERS,
|
1682
1768
|
};
|
1683
1769
|
|
1684
|
-
typedef void (*ggml_opt_callback)(void * data, float * sched);
|
1770
|
+
typedef void (*ggml_opt_callback)(void * data, int accum_step, float * sched, bool * cancel);
|
1771
|
+
typedef void (*ggml_log_callback)(enum ggml_log_level level, const char * text, void * user_data);
|
1685
1772
|
|
1686
1773
|
// optimization parameters
|
1687
1774
|
//
|
@@ -1712,6 +1799,8 @@ extern "C" {
|
|
1712
1799
|
bool print_forward_graph;
|
1713
1800
|
bool print_backward_graph;
|
1714
1801
|
|
1802
|
+
int n_gradient_accumulation;
|
1803
|
+
|
1715
1804
|
// ADAM parameters
|
1716
1805
|
struct {
|
1717
1806
|
int n_iter;
|
@@ -1757,6 +1846,7 @@ extern "C" {
|
|
1757
1846
|
float loss_after;
|
1758
1847
|
|
1759
1848
|
struct {
|
1849
|
+
struct ggml_tensor * g; // current gradient
|
1760
1850
|
struct ggml_tensor * m; // first moment
|
1761
1851
|
struct ggml_tensor * v; // second moment
|
1762
1852
|
struct ggml_tensor * pf; // past function values
|
@@ -1866,39 +1956,39 @@ extern "C" {
|
|
1866
1956
|
|
1867
1957
|
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
1958
|
|
1869
|
-
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
-
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
-
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
-
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1873
|
-
|
1874
|
-
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
-
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
-
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int
|
1877
|
-
|
1878
|
-
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int
|
1879
|
-
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int
|
1880
|
-
|
1881
|
-
//
|
1882
|
-
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int
|
1883
|
-
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int
|
1884
|
-
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int
|
1885
|
-
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int
|
1886
|
-
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int
|
1887
|
-
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int
|
1888
|
-
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int
|
1889
|
-
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int
|
1890
|
-
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int
|
1891
|
-
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int
|
1892
|
-
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int
|
1893
|
-
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int
|
1894
|
-
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int
|
1895
|
-
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int
|
1896
|
-
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
-
|
1898
|
-
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
-
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
-
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
-
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1959
|
+
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
1960
|
+
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
1961
|
+
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
1962
|
+
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
1963
|
+
|
1964
|
+
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
1965
|
+
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
1966
|
+
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int key_id);
|
1967
|
+
|
1968
|
+
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int key_id);
|
1969
|
+
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int key_id);
|
1970
|
+
|
1971
|
+
// will abort if the wrong type is used for the key
|
1972
|
+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int key_id);
|
1973
|
+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int key_id);
|
1974
|
+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int key_id);
|
1975
|
+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int key_id);
|
1976
|
+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int key_id);
|
1977
|
+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int key_id);
|
1978
|
+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int key_id);
|
1979
|
+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int key_id);
|
1980
|
+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int key_id);
|
1981
|
+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int key_id);
|
1982
|
+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int key_id);
|
1983
|
+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int key_id);
|
1984
|
+
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int key_id);
|
1985
|
+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int key_id);
|
1986
|
+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
1987
|
+
|
1988
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
1989
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
1990
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
1991
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
1902
1992
|
|
1903
1993
|
// overrides existing values or adds a new one
|
1904
1994
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
@@ -1943,11 +2033,11 @@ extern "C" {
|
|
1943
2033
|
//
|
1944
2034
|
|
1945
2035
|
// write the entire context to a binary file
|
1946
|
-
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
2036
|
+
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
2037
|
|
1948
2038
|
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
-
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
-
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
2039
|
+
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
2040
|
+
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
1951
2041
|
|
1952
2042
|
//
|
1953
2043
|
// system info
|
@@ -1961,6 +2051,7 @@ extern "C" {
|
|
1961
2051
|
GGML_API int ggml_cpu_has_fma (void);
|
1962
2052
|
GGML_API int ggml_cpu_has_neon (void);
|
1963
2053
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
2054
|
+
GGML_API int ggml_cpu_has_metal (void);
|
1964
2055
|
GGML_API int ggml_cpu_has_f16c (void);
|
1965
2056
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
1966
2057
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|