llama_cpp 0.12.3 → 0.12.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,12 +3,7 @@
3
3
 
4
4
  #include "ggml.h"
5
5
  #include "ggml-backend.h"
6
- #ifdef GGML_USE_CUBLAS
7
- #include "ggml-cuda.h"
8
- #define LLAMA_MAX_DEVICES GGML_CUDA_MAX_DEVICES
9
- #else
10
- #define LLAMA_MAX_DEVICES 1
11
- #endif // GGML_USE_CUBLAS
6
+
12
7
  #include <stddef.h>
13
8
  #include <stdint.h>
14
9
  #include <stdio.h>
@@ -46,11 +41,6 @@
46
41
  #define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
47
42
  #define LLAMA_SESSION_VERSION 4
48
43
 
49
- #if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
50
- // Defined when llama.cpp is compiled with support for offloading model layers to GPU.
51
- #define LLAMA_SUPPORTS_GPU_OFFLOAD
52
- #endif
53
-
54
44
  #ifdef __cplusplus
55
45
  extern "C" {
56
46
  #endif
@@ -108,6 +98,7 @@ extern "C" {
108
98
  LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
109
99
  LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
110
100
  LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
101
+ LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
111
102
 
112
103
  LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
113
104
  };
@@ -196,7 +187,7 @@ extern "C" {
196
187
  // LLAMA_SPLIT_LAYER: ignored
197
188
  int32_t main_gpu;
198
189
 
199
- // proportion of the model (layers or rows) to offload to each GPU, size: LLAMA_MAX_DEVICES
190
+ // proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
200
191
  const float * tensor_split;
201
192
 
202
193
  // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
@@ -222,7 +213,7 @@ extern "C" {
222
213
  uint32_t n_batch; // prompt processing maximum batch size
223
214
  uint32_t n_threads; // number of threads to use for generation
224
215
  uint32_t n_threads_batch; // number of threads to use for batch processing
225
- int8_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
216
+ int32_t rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
226
217
 
227
218
  // ref: https://github.com/ggerganov/llama.cpp/pull/2054
228
219
  float rope_freq_base; // RoPE base frequency, 0 = from model
@@ -333,9 +324,14 @@ extern "C" {
333
324
 
334
325
  LLAMA_API int64_t llama_time_us(void);
335
326
 
336
- LLAMA_API int32_t llama_max_devices(void);
337
- LLAMA_API bool llama_mmap_supported (void);
338
- LLAMA_API bool llama_mlock_supported(void);
327
+ LLAMA_API size_t llama_max_devices(void);
328
+
329
+ LLAMA_API bool llama_supports_mmap (void);
330
+ LLAMA_API bool llama_supports_mlock (void);
331
+ LLAMA_API bool llama_supports_gpu_offload(void);
332
+
333
+ LLAMA_API DEPRECATED(bool llama_mmap_supported (void), "use llama_supports_mmap() instead");
334
+ LLAMA_API DEPRECATED(bool llama_mlock_supported(void), "use llama_supports_mlock() instead");
339
335
 
340
336
  LLAMA_API const struct llama_model * llama_get_model(const struct llama_context * ctx);
341
337
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llama_cpp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.3
4
+ version: 0.12.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-27 00:00:00.000000000 Z
11
+ date: 2024-02-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: llama_cpp.rb provides Ruby bindings for the llama.cpp.
14
14
  email:
@@ -45,6 +45,8 @@ files:
45
45
  - vendor/tmp/llama.cpp/ggml-cuda.cu
46
46
  - vendor/tmp/llama.cpp/ggml-cuda.h
47
47
  - vendor/tmp/llama.cpp/ggml-impl.h
48
+ - vendor/tmp/llama.cpp/ggml-kompute.cpp
49
+ - vendor/tmp/llama.cpp/ggml-kompute.h
48
50
  - vendor/tmp/llama.cpp/ggml-metal.h
49
51
  - vendor/tmp/llama.cpp/ggml-metal.m
50
52
  - vendor/tmp/llama.cpp/ggml-metal.metal
@@ -54,6 +56,11 @@ files:
54
56
  - vendor/tmp/llama.cpp/ggml-opencl.h
55
57
  - vendor/tmp/llama.cpp/ggml-quants.c
56
58
  - vendor/tmp/llama.cpp/ggml-quants.h
59
+ - vendor/tmp/llama.cpp/ggml-sycl.cpp
60
+ - vendor/tmp/llama.cpp/ggml-sycl.h
61
+ - vendor/tmp/llama.cpp/ggml-vulkan-shaders.hpp
62
+ - vendor/tmp/llama.cpp/ggml-vulkan.cpp
63
+ - vendor/tmp/llama.cpp/ggml-vulkan.h
57
64
  - vendor/tmp/llama.cpp/ggml.c
58
65
  - vendor/tmp/llama.cpp/ggml.h
59
66
  - vendor/tmp/llama.cpp/llama.cpp