llama_cpp 0.12.3 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/llama_cpp/extconf.rb +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +22 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/sig/llama_cpp.rbs +4 -2
- data/vendor/tmp/llama.cpp/Makefile +160 -56
- data/vendor/tmp/llama.cpp/ggml-alloc.c +85 -25
- data/vendor/tmp/llama.cpp/ggml-backend-impl.h +6 -0
- data/vendor/tmp/llama.cpp/ggml-backend.c +115 -3
- data/vendor/tmp/llama.cpp/ggml-backend.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-cuda.cu +688 -270
- data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.cpp +1990 -0
- data/vendor/tmp/llama.cpp/ggml-kompute.h +46 -0
- data/vendor/tmp/llama.cpp/ggml-metal.h +3 -0
- data/vendor/tmp/llama.cpp/ggml-metal.m +121 -86
- data/vendor/tmp/llama.cpp/ggml-metal.metal +303 -4
- data/vendor/tmp/llama.cpp/ggml-opencl.cpp +95 -3
- data/vendor/tmp/llama.cpp/ggml-opencl.h +1 -0
- data/vendor/tmp/llama.cpp/ggml-quants.c +745 -109
- data/vendor/tmp/llama.cpp/ggml-quants.h +81 -56
- data/vendor/tmp/llama.cpp/ggml-sycl.cpp +15296 -0
- data/vendor/tmp/llama.cpp/ggml-sycl.h +29 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp +51714 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.cpp +5726 -0
- data/vendor/tmp/llama.cpp/ggml-vulkan.h +39 -0
- data/vendor/tmp/llama.cpp/ggml.c +356 -60
- data/vendor/tmp/llama.cpp/ggml.h +7 -1
- data/vendor/tmp/llama.cpp/llama.cpp +876 -118
- data/vendor/tmp/llama.cpp/llama.h +12 -16
- metadata +9 -2
@@ -3,12 +3,7 @@
|
|
3
3
|
|
4
4
|
#include "ggml.h"
|
5
5
|
#include "ggml-backend.h"
|
6
|
-
|
7
|
-
#include "ggml-cuda.h"
|
8
|
-
#define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
|
9
|
-
#else
|
10
|
-
#define LLAMA_MAX_DEVICES 1
|
11
|
-
#endif // GGML_USE_CUBLAS
|
6
|
+
|
12
7
|
#include <stddef.h>
|
13
8
|
#include <stdint.h>
|
14
9
|
#include <stdio.h>
|
@@ -46,11 +41,6 @@
|
|
46
41
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
47
42
|
#define LLAMA_SESSION_VERSION 4
|
48
43
|
|
49
|
-
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
|
50
|
-
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
51
|
-
#define LLAMA_SUPPORTS_GPU_OFFLOAD
|
52
|
-
#endif
|
53
|
-
|
54
44
|
#ifdef __cplusplus
|
55
45
|
extern "C" {
|
56
46
|
#endif
|
@@ -108,6 +98,7 @@ extern "C" {
|
|
108
98
|
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
|
109
99
|
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
|
110
100
|
LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
|
101
|
+
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
|
111
102
|
|
112
103
|
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
|
113
104
|
};
|
@@ -196,7 +187,7 @@ extern "C" {
|
|
196
187
|
// LLAMA_SPLIT_LAYER: ignored
|
197
188
|
int32_t main_gpu;
|
198
189
|
|
199
|
-
// proportion of the model (layers or rows) to offload to each GPU, size:
|
190
|
+
// proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
|
200
191
|
const float * tensor_split;
|
201
192
|
|
202
193
|
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
@@ -222,7 +213,7 @@ extern "C" {
|
|
222
213
|
uint32_t n_batch; // prompt processing maximum batch size
|
223
214
|
uint32_t n_threads; // number of threads to use for generation
|
224
215
|
uint32_t n_threads_batch; // number of threads to use for batch processing
|
225
|
-
|
216
|
+
int32_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
226
217
|
|
227
218
|
// ref: https://github.com/ggerganov/llama.cpp/pull/2054
|
228
219
|
float rope_freq_base; // RoPE base frequency, 0 = from model
|
@@ -333,9 +324,14 @@ extern "C" {
|
|
333
324
|
|
334
325
|
LLAMA_API int64_t llama_time_us(void);
|
335
326
|
|
336
|
-
LLAMA_API
|
337
|
-
|
338
|
-
LLAMA_API bool
|
327
|
+
LLAMA_API size_t llama_max_devices(void);
|
328
|
+
|
329
|
+
LLAMA_API bool llama_supports_mmap (void);
|
330
|
+
LLAMA_API bool llama_supports_mlock (void);
|
331
|
+
LLAMA_API bool llama_supports_gpu_offload(void);
|
332
|
+
|
333
|
+
LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
|
334
|
+
LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
|
339
335
|
|
340
336
|
LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
|
341
337
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llama_cpp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
|
14
14
|
email:
|
@@ -45,6 +45,8 @@ files:
|
|
45
45
|
- vendor/tmp/llama.cpp/ggml-cuda.cu
|
46
46
|
- vendor/tmp/llama.cpp/ggml-cuda.h
|
47
47
|
- vendor/tmp/llama.cpp/ggml-impl.h
|
48
|
+
- vendor/tmp/llama.cpp/ggml-kompute.cpp
|
49
|
+
- vendor/tmp/llama.cpp/ggml-kompute.h
|
48
50
|
- vendor/tmp/llama.cpp/ggml-metal.h
|
49
51
|
- vendor/tmp/llama.cpp/ggml-metal.m
|
50
52
|
- vendor/tmp/llama.cpp/ggml-metal.metal
|
@@ -54,6 +56,11 @@ files:
|
|
54
56
|
- vendor/tmp/llama.cpp/ggml-opencl.h
|
55
57
|
- vendor/tmp/llama.cpp/ggml-quants.c
|
56
58
|
- vendor/tmp/llama.cpp/ggml-quants.h
|
59
|
+
- vendor/tmp/llama.cpp/ggml-sycl.cpp
|
60
|
+
- vendor/tmp/llama.cpp/ggml-sycl.h
|
61
|
+
- vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
|
62
|
+
- vendor/tmp/llama.cpp/ggml-vulkan.cpp
|
63
|
+
- vendor/tmp/llama.cpp/ggml-vulkan.h
|
57
64
|
- vendor/tmp/llama.cpp/ggml.c
|
58
65
|
- vendor/tmp/llama.cpp/ggml.h
|
59
66
|
- vendor/tmp/llama.cpp/llama.cpp
|