llama_cpp 0.12.4 → 0.12.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/ext/llama_cpp/llama_cpp.cpp +46 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +7 -0
- data/vendor/tmp/llama.cpp/Makefile +146 -53
- data/vendor/tmp/llama.cpp/ggml-alloc.c +563 -490
- data/vendor/tmp/llama.cpp/ggml-alloc.h +39 -65
- data/vendor/tmp/llama.cpp/ggml-backend.c +250 -262
- data/vendor/tmp/llama.cpp/ggml-backend.h +8 -12
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +2 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +386 -134
- data/vendor/tmp/llama.cpp/ggml-quants.h +68 -59
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +139 -145
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +1516 -10656
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +1777 -1238
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +14 -9
- data/vendor/tmp/llama.cpp/ggml.c +147 -70
- data/vendor/tmp/llama.cpp/ggml.h +26 -6
- data/vendor/tmp/llama.cpp/llama.cpp +920 -173
- data/vendor/tmp/llama.cpp/llama.h +7 -1
- data/vendor/tmp/llama.cpp/unicode.h +42 -30
- metadata +2 -2
data/vendor/tmp/llama.cpp/ggml.h
CHANGED
@@ -505,11 +505,17 @@ extern "C" {
|
|
505
505
|
|
506
506
|
enum ggml_log_level {
|
507
507
|
GGML_LOG_LEVEL_ERROR = 2,
|
508
|
-
GGML_LOG_LEVEL_WARN
|
509
|
-
GGML_LOG_LEVEL_INFO
|
508
|
+
GGML_LOG_LEVEL_WARN = 3,
|
509
|
+
GGML_LOG_LEVEL_INFO = 4,
|
510
510
|
GGML_LOG_LEVEL_DEBUG = 5
|
511
511
|
};
|
512
512
|
|
513
|
+
enum ggml_tensor_flag {
|
514
|
+
GGML_TENSOR_FLAG_INPUT = 1,
|
515
|
+
GGML_TENSOR_FLAG_OUTPUT = 2,
|
516
|
+
GGML_TENSOR_FLAG_PARAM = 4,
|
517
|
+
};
|
518
|
+
|
513
519
|
// ggml object
|
514
520
|
struct ggml_object {
|
515
521
|
size_t offs;
|
@@ -543,7 +549,7 @@ extern "C" {
|
|
543
549
|
// op params - allocated as int32_t for alignment
|
544
550
|
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
|
545
551
|
|
546
|
-
|
552
|
+
int32_t flags;
|
547
553
|
|
548
554
|
struct ggml_tensor * grad;
|
549
555
|
struct ggml_tensor * src[GGML_MAX_SRC];
|
@@ -567,6 +573,11 @@ extern "C" {
|
|
567
573
|
|
568
574
|
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
|
569
575
|
|
576
|
+
// Abort callback
|
577
|
+
// If not NULL, called before ggml computation
|
578
|
+
// If it returns true, the computation is aborted
|
579
|
+
typedef bool (*ggml_abort_callback)(void * data);
|
580
|
+
|
570
581
|
// the compute plan that needs to be prepared for ggml_graph_compute()
|
571
582
|
// since https://github.com/ggerganov/ggml/issues/287
|
572
583
|
struct ggml_cplan {
|
@@ -576,8 +587,8 @@ extern "C" {
|
|
576
587
|
int n_threads;
|
577
588
|
|
578
589
|
// abort ggml_graph_compute when true
|
579
|
-
|
580
|
-
void *
|
590
|
+
ggml_abort_callback abort_callback;
|
591
|
+
void * abort_callback_data;
|
581
592
|
};
|
582
593
|
|
583
594
|
enum ggml_cgraph_eval_order {
|
@@ -2087,6 +2098,12 @@ extern "C" {
|
|
2087
2098
|
ggml_opt_callback callback,
|
2088
2099
|
void * callback_data);
|
2089
2100
|
|
2101
|
+
//
|
2102
|
+
// tensor flags
|
2103
|
+
//
|
2104
|
+
GGML_API void ggml_set_input(struct ggml_tensor * tensor);
|
2105
|
+
GGML_API void ggml_set_output(struct ggml_tensor * tensor);
|
2106
|
+
|
2090
2107
|
//
|
2091
2108
|
// quantization
|
2092
2109
|
//
|
@@ -2273,6 +2290,7 @@ extern "C" {
|
|
2273
2290
|
GGML_API int ggml_cpu_has_ssse3 (void);
|
2274
2291
|
GGML_API int ggml_cpu_has_sycl (void);
|
2275
2292
|
GGML_API int ggml_cpu_has_vsx (void);
|
2293
|
+
GGML_API int ggml_cpu_has_matmul_int8(void);
|
2276
2294
|
|
2277
2295
|
//
|
2278
2296
|
// Internal types and functions exposed for tests and benchmarks
|
@@ -2286,7 +2304,8 @@ extern "C" {
|
|
2286
2304
|
#endif
|
2287
2305
|
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
|
2288
2306
|
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
|
2289
|
-
typedef void (*ggml_vec_dot_t) (
|
2307
|
+
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
|
2308
|
+
const void * GGML_RESTRICT y, size_t by, int nrc);
|
2290
2309
|
|
2291
2310
|
typedef struct {
|
2292
2311
|
const char * type_name;
|
@@ -2298,6 +2317,7 @@ extern "C" {
|
|
2298
2317
|
ggml_from_float_t from_float_reference;
|
2299
2318
|
ggml_vec_dot_t vec_dot;
|
2300
2319
|
enum ggml_type vec_dot_type;
|
2320
|
+
int64_t nrows; // number of rows to process simultaneously;
|
2301
2321
|
} ggml_type_traits_t;
|
2302
2322
|
|
2303
2323
|
GGML_API ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type);
|