llama_cpp 0.14.2 → 0.14.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/ext/llama_cpp/llama_cpp.cpp +64 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +6 -0
- data/vendor/tmp/llama.cpp/Makefile +91 -21
- data/vendor/tmp/llama.cpp/ggml-alloc.c +14 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +5 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +155 -125
- data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
- data/vendor/tmp/llama.cpp/ggml-common.h +25 -2
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +1779 -10762
- data/vendor/tmp/llama.cpp/ggml-cuda.h +6 -15
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +167 -124
- data/vendor/tmp/llama.cpp/ggml-metal.metal +603 -303
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +663 -56
- data/vendor/tmp/llama.cpp/ggml-quants.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +341 -469
- data/vendor/tmp/llama.cpp/ggml-sycl.h +19 -4
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +37199 -14939
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +335 -307
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -11
- data/vendor/tmp/llama.cpp/ggml.c +229 -107
- data/vendor/tmp/llama.cpp/ggml.h +11 -5
- data/vendor/tmp/llama.cpp/llama.cpp +2136 -464
- data/vendor/tmp/llama.cpp/llama.h +86 -23
- data/vendor/tmp/llama.cpp/unicode-data.cpp +1651 -0
- data/vendor/tmp/llama.cpp/unicode-data.h +16 -0
- data/vendor/tmp/llama.cpp/unicode.cpp +8 -1403
- data/vendor/tmp/llama.cpp/unicode.h +2 -0
- metadata +5 -3
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -214,9 +214,10 @@
|
|
214
214
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
215
215
|
#endif
|
216
216
|
|
217
|
-
#include <stdint.h>
|
218
|
-
#include <stddef.h>
|
219
217
|
#include <stdbool.h>
|
218
|
+
#include <stddef.h>
|
219
|
+
#include <stdint.h>
|
220
|
+
#include <stdio.h>
|
220
221
|
|
221
222
|
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
222
223
|
#define GGML_FILE_VERSION 1
|
@@ -368,6 +369,7 @@ extern "C" {
|
|
368
369
|
GGML_TYPE_I32 = 26,
|
369
370
|
GGML_TYPE_I64 = 27,
|
370
371
|
GGML_TYPE_F64 = 28,
|
372
|
+
GGML_TYPE_IQ1_M = 29,
|
371
373
|
GGML_TYPE_COUNT,
|
372
374
|
};
|
373
375
|
|
@@ -407,6 +409,7 @@ extern "C" {
|
|
407
409
|
GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
|
408
410
|
GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
|
409
411
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
412
|
+
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
410
413
|
};
|
411
414
|
|
412
415
|
// available tensor operations:
|
@@ -708,6 +711,9 @@ extern "C" {
|
|
708
711
|
|
709
712
|
GGML_API void ggml_print_backtrace(void);
|
710
713
|
|
714
|
+
// accepts a UTF-8 path, even on Windows
|
715
|
+
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
716
|
+
|
711
717
|
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
712
718
|
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
713
719
|
|
@@ -744,6 +750,7 @@ extern "C" {
|
|
744
750
|
GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
745
751
|
GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
746
752
|
GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
753
|
+
GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
|
747
754
|
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
748
755
|
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
749
756
|
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
@@ -1157,8 +1164,7 @@ extern "C" {
|
|
1157
1164
|
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1158
1165
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
1159
1166
|
struct ggml_context * ctx,
|
1160
|
-
struct ggml_tensor *
|
1161
|
-
int n_as,
|
1167
|
+
struct ggml_tensor * as,
|
1162
1168
|
struct ggml_tensor * ids,
|
1163
1169
|
int id,
|
1164
1170
|
struct ggml_tensor * b);
|
@@ -2350,7 +2356,7 @@ extern "C" {
|
|
2350
2356
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
2351
2357
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|
2352
2358
|
GGML_API int ggml_cpu_has_blas (void);
|
2353
|
-
GGML_API int
|
2359
|
+
GGML_API int ggml_cpu_has_cuda (void);
|
2354
2360
|
GGML_API int ggml_cpu_has_clblast (void);
|
2355
2361
|
GGML_API int ggml_cpu_has_vulkan (void);
|
2356
2362
|
GGML_API int ggml_cpu_has_kompute (void);
|