llama_cpp 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/llama_cpp.cpp +2 -2
- data/ext/llama_cpp/src/ggml-alloc.c +6 -6
- data/ext/llama_cpp/src/ggml-cuda.cu +99 -46
- data/ext/llama_cpp/src/ggml-metal.m +37 -10
- data/ext/llama_cpp/src/ggml-metal.metal +144 -45
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -3
- data/ext/llama_cpp/src/ggml.c +68 -40
- data/ext/llama_cpp/src/ggml.h +43 -33
- data/ext/llama_cpp/src/llama.cpp +420 -57
- data/ext/llama_cpp/src/llama.h +5 -1
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -195,6 +195,14 @@
|
|
195
195
|
# define GGML_DEPRECATED(func, hint) func
|
196
196
|
#endif
|
197
197
|
|
198
|
+
#ifndef __GNUC__
|
199
|
+
# define GGML_ATTRIBUTE_FORMAT(...)
|
200
|
+
#elif defined(__MINGW32__)
|
201
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
202
|
+
#else
|
203
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
204
|
+
#endif
|
205
|
+
|
198
206
|
#include <stdint.h>
|
199
207
|
#include <stddef.h>
|
200
208
|
#include <stdbool.h>
|
@@ -270,7 +278,7 @@ extern "C" {
|
|
270
278
|
|
271
279
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
280
|
typedef half ggml_fp16_t;
|
273
|
-
#elif defined(__ARM_NEON)
|
281
|
+
#elif defined(__ARM_NEON)
|
274
282
|
typedef __fp16 ggml_fp16_t;
|
275
283
|
#else
|
276
284
|
typedef uint16_t ggml_fp16_t;
|
@@ -685,6 +693,7 @@ extern "C" {
|
|
685
693
|
|
686
694
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
687
695
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
696
|
+
GGML_ATTRIBUTE_FORMAT(2, 3)
|
688
697
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
689
698
|
|
690
699
|
//
|
@@ -1866,39 +1875,39 @@ extern "C" {
|
|
1866
1875
|
|
1867
1876
|
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
1877
|
|
1869
|
-
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
-
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
-
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
-
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1878
|
+
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
1879
|
+
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
1880
|
+
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
1881
|
+
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
1873
1882
|
|
1874
|
-
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
-
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
-
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
|
1883
|
+
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
1884
|
+
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
1885
|
+
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
|
1877
1886
|
|
1878
|
-
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
|
1879
|
-
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
|
1887
|
+
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
|
1888
|
+
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
|
1880
1889
|
|
1881
1890
|
// results are undefined if the wrong type is used for the key
|
1882
|
-
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
|
1883
|
-
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
|
1884
|
-
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
|
1885
|
-
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
|
1886
|
-
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
1887
|
-
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
1888
|
-
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
1889
|
-
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
|
1890
|
-
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
|
1891
|
-
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
|
1892
|
-
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
1893
|
-
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
1894
|
-
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
1895
|
-
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
|
1896
|
-
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
-
|
1898
|
-
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
-
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
-
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
-
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1891
|
+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
|
1892
|
+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
|
1893
|
+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
|
1894
|
+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
|
1895
|
+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
|
1896
|
+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
|
1897
|
+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
|
1898
|
+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
|
1899
|
+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
|
1900
|
+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
|
1901
|
+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
|
1902
|
+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
|
1903
|
+
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
|
1904
|
+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
|
1905
|
+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
1906
|
+
|
1907
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
1908
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
1909
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
1910
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
1902
1911
|
|
1903
1912
|
// overrides existing values or adds a new one
|
1904
1913
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
@@ -1943,11 +1952,11 @@ extern "C" {
|
|
1943
1952
|
//
|
1944
1953
|
|
1945
1954
|
// write the entire context to a binary file
|
1946
|
-
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
1955
|
+
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
1956
|
|
1948
1957
|
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
-
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
-
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
1958
|
+
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
1959
|
+
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
1951
1960
|
|
1952
1961
|
//
|
1953
1962
|
// system info
|
@@ -1961,6 +1970,7 @@ extern "C" {
|
|
1961
1970
|
GGML_API int ggml_cpu_has_fma (void);
|
1962
1971
|
GGML_API int ggml_cpu_has_neon (void);
|
1963
1972
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
1973
|
+
GGML_API int ggml_cpu_has_metal (void);
|
1964
1974
|
GGML_API int ggml_cpu_has_f16c (void);
|
1965
1975
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
1966
1976
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|