llama_cpp 0.12.3 → 0.12.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +22 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -2
- data/vendor/tmp/llama.cpp/Makefile +23 -4
- data/vendor/tmp/llama.cpp/ggml-alloc.c +85 -25
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +115 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +121 -86
- data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
- data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +706 -15
- data/vendor/tmp/llama.cpp/ggml-quants.h +17 -1
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15255 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +60854 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5270 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +34 -0
- data/vendor/tmp/llama.cpp/ggml.c +350 -57
- data/vendor/tmp/llama.cpp/ggml.h +7 -1
- data/vendor/tmp/llama.cpp/llama.cpp +574 -39
- data/vendor/tmp/llama.cpp/llama.h +11 -15
- metadata +9 -2
@@ -3,12 +3,7 @@
|
|
3
3
|
|
4
4
|
#include "ggml.h"
|
5
5
|
#include "ggml-backend.h"
|
6
|
-
|
7
|
-
#include "ggml-cuda.h"
|
8
|
-
#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
|
9
|
-
#else
|
10
|
-
#define LLAMA_MAX_DEVICES 1
|
11
|
-
#endif // GGML_USE_CUBLAS
|
6
|
+
|
12
7
|
#include <stddef.h>
|
13
8
|
#include <stdint.h>
|
14
9
|
#include <stdio.h>
|
@@ -46,11 +41,6 @@
|
|
46
41
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
47
42
|
#define LLAMA_SESSION_VERSION 4
|
48
43
|
|
49
|
-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
|
50
|
-
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
51
|
-
#define LLAMA_SUPPORTS_GPU_OFFLOAD
|
52
|
-
#endif
|
53
|
-
|
54
44
|
#ifdef __cplusplus
|
55
45
|
extern "C" {
|
56
46
|
#endif
|
@@ -108,6 +98,7 @@ extern "C" {
|
|
108
98
|
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
109
99
|
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
|
110
100
|
LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
|
101
|
+
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
|
111
102
|
|
112
103
|
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
113
104
|
};
|
@@ -196,7 +187,7 @@ extern "C" {
|
|
196
187
|
// LLAMA_SPLIT_LAYER: ignored
|
197
188
|
int32_t main_gpu;
|
198
189
|
|
199
|
-
// proportion of the model (layers or rows) to offload to each GPU, size:
|
190
|
+
// proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
200
191
|
const float * tensor_split;
|
201
192
|
|
202
193
|
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
@@ -333,9 +324,14 @@ extern "C" {
|
|
333
324
|
|
334
325
|
LLAMA_API int64_t llama_time_us(void);
|
335
326
|
|
336
|
-
LLAMA_API
|
337
|
-
|
338
|
-
LLAMA_API bool
|
327
|
+
LLAMA_API size_t llama_max_devices(void);
|
328
|
+
|
329
|
+
LLAMA_API bool llama_supports_mmap (void);
|
330
|
+
LLAMA_API bool llama_supports_mlock (void);
|
331
|
+
LLAMA_API bool llama_supports_gpu_offload(void);
|
332
|
+
|
333
|
+
LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
|
334
|
+
LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
|
339
335
|
|
340
336
|
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
|
341
337
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -45,6 +45,8 @@ files:
|
|
45
45
|
- vendor/tmp/llama.cpp/ggml-cuda.cu
|
46
46
|
- vendor/tmp/llama.cpp/ggml-cuda.h
|
47
47
|
- vendor/tmp/llama.cpp/ggml-impl.h
|
48
|
+
- vendor/tmp/llama.cpp/ggml-kompute.cpp
|
49
|
+
- vendor/tmp/llama.cpp/ggml-kompute.h
|
48
50
|
- vendor/tmp/llama.cpp/ggml-metal.h
|
49
51
|
- vendor/tmp/llama.cpp/ggml-metal.m
|
50
52
|
- vendor/tmp/llama.cpp/ggml-metal.metal
|
@@ -54,6 +56,11 @@ files:
|
|
54
56
|
- vendor/tmp/llama.cpp/ggml-opencl.h
|
55
57
|
- vendor/tmp/llama.cpp/ggml-quants.c
|
56
58
|
- vendor/tmp/llama.cpp/ggml-quants.h
|
59
|
+
- vendor/tmp/llama.cpp/ggml-sycl.cpp
|
60
|
+
- vendor/tmp/llama.cpp/ggml-sycl.h
|
61
|
+
- vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
|
62
|
+
- vendor/tmp/llama.cpp/ggml-vulkan.cpp
|
63
|
+
- vendor/tmp/llama.cpp/ggml-vulkan.h
|
57
64
|
- vendor/tmp/llama.cpp/ggml.c
|
58
65
|
- vendor/tmp/llama.cpp/ggml.h
|
59
66
|
- vendor/tmp/llama.cpp/llama.cpp
|