llama_cpp 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/llama_cpp/llama_cpp.cpp +2 -2
- data/ext/llama_cpp/src/ggml-alloc.c +6 -6
- data/ext/llama_cpp/src/ggml-cuda.cu +99 -46
- data/ext/llama_cpp/src/ggml-metal.m +37 -10
- data/ext/llama_cpp/src/ggml-metal.metal +144 -45
- data/ext/llama_cpp/src/ggml-opencl.cpp +3 -3
- data/ext/llama_cpp/src/ggml.c +68 -40
- data/ext/llama_cpp/src/ggml.h +43 -33
- data/ext/llama_cpp/src/llama.cpp +420 -57
- data/ext/llama_cpp/src/llama.h +5 -1
- data/lib/llama_cpp/version.rb +2 -2
- metadata +2 -2
data/ext/llama_cpp/src/ggml.h
CHANGED
@@ -195,6 +195,14 @@
|
|
195
195
|
# define GGML_DEPRECATED(func, hint) func
|
196
196
|
#endif
|
197
197
|
|
198
|
+
#ifndef __GNUC__
|
199
|
+
# define GGML_ATTRIBUTE_FORMAT(...)
|
200
|
+
#elif defined(__MINGW32__)
|
201
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
202
|
+
#else
|
203
|
+
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
204
|
+
#endif
|
205
|
+
|
198
206
|
#include <stdint.h>
|
199
207
|
#include <stddef.h>
|
200
208
|
#include <stdbool.h>
|
@@ -270,7 +278,7 @@ extern "C" {
|
|
270
278
|
|
271
279
|
#if defined(__ARM_NEON) && defined(__CUDACC__)
|
272
280
|
typedef half ggml_fp16_t;
|
273
|
-
#elif defined(__ARM_NEON)
|
281
|
+
#elif defined(__ARM_NEON)
|
274
282
|
typedef __fp16 ggml_fp16_t;
|
275
283
|
#else
|
276
284
|
typedef uint16_t ggml_fp16_t;
|
@@ -685,6 +693,7 @@ extern "C" {
|
|
685
693
|
|
686
694
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
687
695
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
696
|
+
GGML_ATTRIBUTE_FORMAT(2, 3)
|
688
697
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
689
698
|
|
690
699
|
//
|
@@ -1866,39 +1875,39 @@ extern "C" {
|
|
1866
1875
|
|
1867
1876
|
GGML_API const char * gguf_type_name(enum gguf_type type);
|
1868
1877
|
|
1869
|
-
GGML_API int gguf_get_version (struct gguf_context * ctx);
|
1870
|
-
GGML_API size_t gguf_get_alignment (struct gguf_context * ctx);
|
1871
|
-
GGML_API size_t gguf_get_data_offset(struct gguf_context * ctx);
|
1872
|
-
GGML_API void * gguf_get_data (struct gguf_context * ctx);
|
1878
|
+
GGML_API int gguf_get_version (const struct gguf_context * ctx);
|
1879
|
+
GGML_API size_t gguf_get_alignment (const struct gguf_context * ctx);
|
1880
|
+
GGML_API size_t gguf_get_data_offset(const struct gguf_context * ctx);
|
1881
|
+
GGML_API void * gguf_get_data (const struct gguf_context * ctx);
|
1873
1882
|
|
1874
|
-
GGML_API int gguf_get_n_kv(struct gguf_context * ctx);
|
1875
|
-
GGML_API int gguf_find_key(struct gguf_context * ctx, const char * key);
|
1876
|
-
GGML_API const char * gguf_get_key (struct gguf_context * ctx, int i);
|
1883
|
+
GGML_API int gguf_get_n_kv(const struct gguf_context * ctx);
|
1884
|
+
GGML_API int gguf_find_key(const struct gguf_context * ctx, const char * key);
|
1885
|
+
GGML_API const char * gguf_get_key (const struct gguf_context * ctx, int i);
|
1877
1886
|
|
1878
|
-
GGML_API enum gguf_type gguf_get_kv_type (struct gguf_context * ctx, int i);
|
1879
|
-
GGML_API enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i);
|
1887
|
+
GGML_API enum gguf_type gguf_get_kv_type (const struct gguf_context * ctx, int i);
|
1888
|
+
GGML_API enum gguf_type gguf_get_arr_type(const struct gguf_context * ctx, int i);
|
1880
1889
|
|
1881
1890
|
// results are undefined if the wrong type is used for the key
|
1882
|
-
GGML_API uint8_t gguf_get_val_u8 (struct gguf_context * ctx, int i);
|
1883
|
-
GGML_API int8_t gguf_get_val_i8 (struct gguf_context * ctx, int i);
|
1884
|
-
GGML_API uint16_t gguf_get_val_u16 (struct gguf_context * ctx, int i);
|
1885
|
-
GGML_API int16_t gguf_get_val_i16 (struct gguf_context * ctx, int i);
|
1886
|
-
GGML_API uint32_t gguf_get_val_u32 (struct gguf_context * ctx, int i);
|
1887
|
-
GGML_API int32_t gguf_get_val_i32 (struct gguf_context * ctx, int i);
|
1888
|
-
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
|
1889
|
-
GGML_API uint64_t gguf_get_val_u64 (struct gguf_context * ctx, int i);
|
1890
|
-
GGML_API int64_t gguf_get_val_i64 (struct gguf_context * ctx, int i);
|
1891
|
-
GGML_API double gguf_get_val_f64 (struct gguf_context * ctx, int i);
|
1892
|
-
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
|
1893
|
-
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
|
1894
|
-
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
|
1895
|
-
GGML_API const void * gguf_get_arr_data(struct gguf_context * ctx, int i);
|
1896
|
-
GGML_API const char * gguf_get_arr_str (struct gguf_context * ctx, int key_id, int i);
|
1897
|
-
|
1898
|
-
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
|
1899
|
-
GGML_API int gguf_find_tensor (struct gguf_context * ctx, const char * name);
|
1900
|
-
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);
|
1901
|
-
GGML_API char * gguf_get_tensor_name (struct gguf_context * ctx, int i);
|
1891
|
+
GGML_API uint8_t gguf_get_val_u8 (const struct gguf_context * ctx, int i);
|
1892
|
+
GGML_API int8_t gguf_get_val_i8 (const struct gguf_context * ctx, int i);
|
1893
|
+
GGML_API uint16_t gguf_get_val_u16 (const struct gguf_context * ctx, int i);
|
1894
|
+
GGML_API int16_t gguf_get_val_i16 (const struct gguf_context * ctx, int i);
|
1895
|
+
GGML_API uint32_t gguf_get_val_u32 (const struct gguf_context * ctx, int i);
|
1896
|
+
GGML_API int32_t gguf_get_val_i32 (const struct gguf_context * ctx, int i);
|
1897
|
+
GGML_API float gguf_get_val_f32 (const struct gguf_context * ctx, int i);
|
1898
|
+
GGML_API uint64_t gguf_get_val_u64 (const struct gguf_context * ctx, int i);
|
1899
|
+
GGML_API int64_t gguf_get_val_i64 (const struct gguf_context * ctx, int i);
|
1900
|
+
GGML_API double gguf_get_val_f64 (const struct gguf_context * ctx, int i);
|
1901
|
+
GGML_API bool gguf_get_val_bool(const struct gguf_context * ctx, int i);
|
1902
|
+
GGML_API const char * gguf_get_val_str (const struct gguf_context * ctx, int i);
|
1903
|
+
GGML_API int gguf_get_arr_n (const struct gguf_context * ctx, int i);
|
1904
|
+
GGML_API const void * gguf_get_arr_data(const struct gguf_context * ctx, int i);
|
1905
|
+
GGML_API const char * gguf_get_arr_str (const struct gguf_context * ctx, int key_id, int i);
|
1906
|
+
|
1907
|
+
GGML_API int gguf_get_n_tensors (const struct gguf_context * ctx);
|
1908
|
+
GGML_API int gguf_find_tensor (const struct gguf_context * ctx, const char * name);
|
1909
|
+
GGML_API size_t gguf_get_tensor_offset(const struct gguf_context * ctx, int i);
|
1910
|
+
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
|
1902
1911
|
|
1903
1912
|
// overrides existing values or adds a new one
|
1904
1913
|
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
|
@@ -1943,11 +1952,11 @@ extern "C" {
|
|
1943
1952
|
//
|
1944
1953
|
|
1945
1954
|
// write the entire context to a binary file
|
1946
|
-
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname, bool only_meta);
|
1955
|
+
GGML_API void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
|
1947
1956
|
|
1948
1957
|
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding
|
1949
|
-
GGML_API size_t gguf_get_meta_size(struct gguf_context * ctx);
|
1950
|
-
GGML_API void gguf_get_meta_data(struct gguf_context * ctx, void * data);
|
1958
|
+
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
1959
|
+
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
1951
1960
|
|
1952
1961
|
//
|
1953
1962
|
// system info
|
@@ -1961,6 +1970,7 @@ extern "C" {
|
|
1961
1970
|
GGML_API int ggml_cpu_has_fma (void);
|
1962
1971
|
GGML_API int ggml_cpu_has_neon (void);
|
1963
1972
|
GGML_API int ggml_cpu_has_arm_fma (void);
|
1973
|
+
GGML_API int ggml_cpu_has_metal (void);
|
1964
1974
|
GGML_API int ggml_cpu_has_f16c (void);
|
1965
1975
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
1966
1976
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|