llama_cpp 0.14.2 → 0.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/ext/llama_cpp/llama_cpp.cpp +64 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +6 -0
- data/vendor/tmp/llama.cpp/Makefile +91 -21
- data/vendor/tmp/llama.cpp/ggml-alloc.c +14 -5
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +5 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +155 -125
- data/vendor/tmp/llama.cpp/ggml-backend.h +4 -4
- data/vendor/tmp/llama.cpp/ggml-common.h +25 -2
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +1779 -10762
- data/vendor/tmp/llama.cpp/ggml-cuda.h +6 -15
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +167 -124
- data/vendor/tmp/llama.cpp/ggml-metal.metal +603 -303
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +5 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +663 -56
- data/vendor/tmp/llama.cpp/ggml-quants.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +341 -469
- data/vendor/tmp/llama.cpp/ggml-sycl.h +19 -4
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +37199 -14939
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +335 -307
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +0 -11
- data/vendor/tmp/llama.cpp/ggml.c +229 -107
- data/vendor/tmp/llama.cpp/ggml.h +11 -5
- data/vendor/tmp/llama.cpp/llama.cpp +2136 -464
- data/vendor/tmp/llama.cpp/llama.h +86 -23
- data/vendor/tmp/llama.cpp/unicode-data.cpp +1651 -0
- data/vendor/tmp/llama.cpp/unicode-data.h +16 -0
- data/vendor/tmp/llama.cpp/unicode.cpp +8 -1403
- data/vendor/tmp/llama.cpp/unicode.h +2 -0
- metadata +5 -3
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -214,9 +214,10 @@
|
|
214
214
|
# define GGML_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
215
215
|
#endif
|
216
216
|
|
217
|
-
#include <stdint.h>
|
218
|
-
#include <stddef.h>
|
219
217
|
#include <stdbool.h>
|
218
|
+
#include <stddef.h>
|
219
|
+
#include <stdint.h>
|
220
|
+
#include <stdio.h>
|
220
221
|
|
221
222
|
#define GGML_FILE_MAGIC 0x67676d6c // "ggml"
|
222
223
|
#define GGML_FILE_VERSION 1
|
@@ -368,6 +369,7 @@ extern "C" {
|
|
368
369
|
GGML_TYPE_I32 = 26,
|
369
370
|
GGML_TYPE_I64 = 27,
|
370
371
|
GGML_TYPE_F64 = 28,
|
372
|
+
GGML_TYPE_IQ1_M = 29,
|
371
373
|
GGML_TYPE_COUNT,
|
372
374
|
};
|
373
375
|
|
@@ -407,6 +409,7 @@ extern "C" {
|
|
407
409
|
GGML_FTYPE_MOSTLY_IQ3_S = 20, // except 1d tensors
|
408
410
|
GGML_FTYPE_MOSTLY_IQ2_S = 21, // except 1d tensors
|
409
411
|
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
412
|
+
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
410
413
|
};
|
411
414
|
|
412
415
|
// available tensor operations:
|
@@ -708,6 +711,9 @@ extern "C" {
|
|
708
711
|
|
709
712
|
GGML_API void ggml_print_backtrace(void);
|
710
713
|
|
714
|
+
// accepts a UTF-8 path, even on Windows
|
715
|
+
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
|
716
|
+
|
711
717
|
GGML_API void ggml_numa_init(enum ggml_numa_strategy numa); // call once for better performance on NUMA systems
|
712
718
|
GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node
|
713
719
|
|
@@ -744,6 +750,7 @@ extern "C" {
|
|
744
750
|
GGML_API GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor);
|
745
751
|
GGML_API GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
746
752
|
GGML_API GGML_CALL bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
753
|
+
GGML_API GGML_CALL bool ggml_is_empty (const struct ggml_tensor * tensor);
|
747
754
|
GGML_API bool ggml_is_scalar (const struct ggml_tensor * tensor);
|
748
755
|
GGML_API bool ggml_is_vector (const struct ggml_tensor * tensor);
|
749
756
|
GGML_API bool ggml_is_matrix (const struct ggml_tensor * tensor);
|
@@ -1157,8 +1164,7 @@ extern "C" {
|
|
1157
1164
|
// ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
|
1158
1165
|
GGML_API struct ggml_tensor * ggml_mul_mat_id(
|
1159
1166
|
struct ggml_context * ctx,
|
1160
|
-
struct ggml_tensor *
|
1161
|
-
int n_as,
|
1167
|
+
struct ggml_tensor * as,
|
1162
1168
|
struct ggml_tensor * ids,
|
1163
1169
|
int id,
|
1164
1170
|
struct ggml_tensor * b);
|
@@ -2350,7 +2356,7 @@ extern "C" {
|
|
2350
2356
|
GGML_API int ggml_cpu_has_fp16_va (void);
|
2351
2357
|
GGML_API int ggml_cpu_has_wasm_simd (void);
|
2352
2358
|
GGML_API int ggml_cpu_has_blas (void);
|
2353
|
-
GGML_API int
|
2359
|
+
GGML_API int ggml_cpu_has_cuda (void);
|
2354
2360
|
GGML_API int ggml_cpu_has_clblast (void);
|
2355
2361
|
GGML_API int ggml_cpu_has_vulkan (void);
|
2356
2362
|
GGML_API int ggml_cpu_has_kompute (void);
|