llama_cpp 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +8 -0
 - data/ext/llama_cpp/llama_cpp.cpp +64 -0
 - data/lib/llama_cpp/version.rb +2 -2
 - data/sig/llama_cpp.rbs +7 -0
 - data/vendor/tmp/llama.cpp/Makefile +0 -9
 - data/vendor/tmp/llama.cpp/ggml-alloc.c +28 -6
 - data/vendor/tmp/llama.cpp/ggml-alloc.h +3 -1
 - data/vendor/tmp/llama.cpp/ggml-backend-impl.h +36 -36
 - data/vendor/tmp/llama.cpp/ggml-backend.c +510 -263
 - data/vendor/tmp/llama.cpp/ggml-backend.h +42 -32
 - data/vendor/tmp/llama.cpp/ggml-cuda.cu +692 -476
 - data/vendor/tmp/llama.cpp/ggml-cuda.h +18 -30
 - data/vendor/tmp/llama.cpp/ggml-impl.h +2 -0
 - data/vendor/tmp/llama.cpp/ggml-metal.h +4 -56
 - data/vendor/tmp/llama.cpp/ggml-metal.m +1860 -2073
 - data/vendor/tmp/llama.cpp/ggml-opencl.cpp +321 -14
 - data/vendor/tmp/llama.cpp/ggml-opencl.h +13 -3
 - data/vendor/tmp/llama.cpp/ggml-quants.c +1638 -134
 - data/vendor/tmp/llama.cpp/ggml-quants.h +15 -4
 - data/vendor/tmp/llama.cpp/ggml.c +142 -64
 - data/vendor/tmp/llama.cpp/ggml.h +47 -29
 - data/vendor/tmp/llama.cpp/llama.cpp +1219 -1615
 - data/vendor/tmp/llama.cpp/llama.h +30 -8
 - metadata +2 -2
 
| 
         @@ -17,22 +17,31 @@ extern "C" { 
     | 
|
| 
       17 
17 
     | 
    
         
             
                //
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
19 
     | 
    
         
             
                // buffer type
         
     | 
| 
       20 
     | 
    
         
            -
                GGML_API  
     | 
| 
       21 
     | 
    
         
            -
                GGML_API  
     | 
| 
       22 
     | 
    
         
            -
                GGML_API 
     | 
| 
       23 
     | 
    
         
            -
                GGML_API  
     | 
| 
       24 
     | 
    
         
            -
                GGML_API 
     | 
| 
      
 20 
     | 
    
         
            +
                GGML_API           const char *          ggml_backend_buft_name            (ggml_backend_buffer_type_t buft);
         
     | 
| 
      
 21 
     | 
    
         
            +
                GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_buft_alloc_buffer    (ggml_backend_buffer_type_t buft, size_t size);
         
     | 
| 
      
 22 
     | 
    
         
            +
                GGML_API           size_t                ggml_backend_buft_get_alignment   (ggml_backend_buffer_type_t buft);
         
     | 
| 
      
 23 
     | 
    
         
            +
                GGML_API GGML_CALL size_t                ggml_backend_buft_get_alloc_size  (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor);
         
     | 
| 
      
 24 
     | 
    
         
            +
                GGML_API           bool                  ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend);
         
     | 
| 
      
 25 
     | 
    
         
            +
                GGML_API           bool                  ggml_backend_buft_is_host         (ggml_backend_buffer_type_t buft);
         
     | 
| 
       25 
26 
     | 
    
         | 
| 
       26 
27 
     | 
    
         
             
                // buffer
         
     | 
| 
       27 
     | 
    
         
            -
                 
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
                 
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
                GGML_API  
     | 
| 
       33 
     | 
    
         
            -
                GGML_API 
     | 
| 
       34 
     | 
    
         
            -
                GGML_API  
     | 
| 
       35 
     | 
    
         
            -
                GGML_API 
     | 
| 
      
 28 
     | 
    
         
            +
                enum ggml_backend_buffer_usage {
         
     | 
| 
      
 29 
     | 
    
         
            +
                    GGML_BACKEND_BUFFER_USAGE_ANY = 0,
         
     | 
| 
      
 30 
     | 
    
         
            +
                    GGML_BACKEND_BUFFER_USAGE_WEIGHTS = 1,
         
     | 
| 
      
 31 
     | 
    
         
            +
                };
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                GGML_API           const char *               ggml_backend_buffer_name          (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 34 
     | 
    
         
            +
                GGML_API           void                       ggml_backend_buffer_free          (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 35 
     | 
    
         
            +
                GGML_API           void *                     ggml_backend_buffer_get_base      (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 36 
     | 
    
         
            +
                GGML_API           size_t                     ggml_backend_buffer_get_size      (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 37 
     | 
    
         
            +
                GGML_API GGML_CALL void                       ggml_backend_buffer_init_tensor   (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
         
     | 
| 
      
 38 
     | 
    
         
            +
                GGML_API           size_t                     ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 39 
     | 
    
         
            +
                GGML_API           size_t                     ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
         
     | 
| 
      
 40 
     | 
    
         
            +
                GGML_API           void                       ggml_backend_buffer_clear         (ggml_backend_buffer_t buffer, uint8_t value);
         
     | 
| 
      
 41 
     | 
    
         
            +
                GGML_API           bool                       ggml_backend_buffer_is_host       (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 42 
     | 
    
         
            +
                GGML_API           void                       ggml_backend_buffer_set_usage     (ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage);
         
     | 
| 
      
 43 
     | 
    
         
            +
                GGML_API           ggml_backend_buffer_type_t ggml_backend_buffer_get_type      (ggml_backend_buffer_t buffer);
         
     | 
| 
      
 44 
     | 
    
         
            +
                GGML_API           void                       ggml_backend_buffer_reset         (ggml_backend_buffer_t buffer);
         
     | 
| 
       36 
45 
     | 
    
         | 
| 
       37 
46 
     | 
    
         
             
                //
         
     | 
| 
       38 
47 
     | 
    
         
             
                // Backend
         
     | 
| 
         @@ -49,8 +58,8 @@ extern "C" { 
     | 
|
| 
       49 
58 
     | 
    
         
             
                GGML_API void ggml_backend_tensor_set_async(ggml_backend_t backend,       struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
         
     | 
| 
       50 
59 
     | 
    
         
             
                GGML_API void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
         
     | 
| 
       51 
60 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
                GGML_API void ggml_backend_tensor_set(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
         
     | 
| 
       53 
     | 
    
         
            -
                GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
         
     | 
| 
      
 61 
     | 
    
         
            +
                GGML_API GGML_CALL void ggml_backend_tensor_set(      struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
         
     | 
| 
      
 62 
     | 
    
         
            +
                GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor,       void * data, size_t offset, size_t size);
         
     | 
| 
       54 
63 
     | 
    
         | 
| 
       55 
64 
     | 
    
         
             
                GGML_API void ggml_backend_synchronize(ggml_backend_t backend);
         
     | 
| 
       56 
65 
     | 
    
         | 
| 
         @@ -71,13 +80,13 @@ extern "C" { 
     | 
|
| 
       71 
80 
     | 
    
         | 
| 
       72 
81 
     | 
    
         
             
                GGML_API ggml_backend_t ggml_backend_cpu_init(void);
         
     | 
| 
       73 
82 
     | 
    
         | 
| 
       74 
     | 
    
         
            -
                GGML_API bool ggml_backend_is_cpu(ggml_backend_t backend);
         
     | 
| 
       75 
     | 
    
         
            -
                GGML_API 
     | 
| 
      
 83 
     | 
    
         
            +
                GGML_API GGML_CALL bool ggml_backend_is_cpu           (ggml_backend_t backend);
         
     | 
| 
      
 84 
     | 
    
         
            +
                GGML_API           void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
         
     | 
| 
       76 
85 
     | 
    
         | 
| 
       77 
86 
     | 
    
         
             
                // Create a backend buffer from an existing pointer
         
     | 
| 
       78 
     | 
    
         
            -
                GGML_API ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
         
     | 
| 
      
 87 
     | 
    
         
            +
                GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
         
     | 
| 
       79 
88 
     | 
    
         | 
| 
       80 
     | 
    
         
            -
                GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
         
     | 
| 
      
 89 
     | 
    
         
            +
                GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void);
         
     | 
| 
       81 
90 
     | 
    
         | 
| 
       82 
91 
     | 
    
         
             
            #ifdef GGML_USE_CPU_HBM
         
     | 
| 
       83 
92 
     | 
    
         
             
                GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
         
     | 
| 
         @@ -140,23 +149,24 @@ extern "C" { 
     | 
|
| 
       140 
149 
     | 
    
         
             
                typedef struct ggml_backend_sched * ggml_backend_sched_t;
         
     | 
| 
       141 
150 
     | 
    
         | 
| 
       142 
151 
     | 
    
         
             
                // Initialize a backend scheduler
         
     | 
| 
       143 
     | 
    
         
            -
                GGML_API ggml_backend_sched_t 
     | 
| 
       144 
     | 
    
         
            -
             
     | 
| 
       145 
     | 
    
         
            -
                GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
         
     | 
| 
       146 
     | 
    
         
            -
             
     | 
| 
      
 152 
     | 
    
         
            +
                GGML_API ggml_backend_sched_t  ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
         
     | 
| 
      
 153 
     | 
    
         
            +
                GGML_API void                  ggml_backend_sched_free(ggml_backend_sched_t sched);
         
     | 
| 
       147 
154 
     | 
    
         
             
                // Initialize backend buffers from a measure graph
         
     | 
| 
       148 
     | 
    
         
            -
                GGML_API void 
     | 
| 
      
 155 
     | 
    
         
            +
                GGML_API void                  ggml_backend_sched_init_measure(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
         
     | 
| 
      
 156 
     | 
    
         
            +
                // Get the number of splits of the last graph
         
     | 
| 
      
 157 
     | 
    
         
            +
                GGML_API int                   ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
         
     | 
| 
       149 
158 
     | 
    
         | 
| 
       150 
159 
     | 
    
         
             
                GGML_API ggml_tallocr_t        ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend);
         
     | 
| 
       151 
160 
     | 
    
         
             
                GGML_API ggml_backend_buffer_t ggml_backend_sched_get_buffer (ggml_backend_sched_t sched, ggml_backend_t backend);
         
     | 
| 
       152 
161 
     | 
    
         | 
| 
       153 
     | 
    
         
            -
                GGML_API void 
     | 
| 
      
 162 
     | 
    
         
            +
                GGML_API void                  ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
         
     | 
| 
      
 163 
     | 
    
         
            +
                GGML_API ggml_backend_t        ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
         
     | 
| 
       154 
164 
     | 
    
         | 
| 
       155 
     | 
    
         
            -
                // Allocate  
     | 
| 
       156 
     | 
    
         
            -
                GGML_API void 
     | 
| 
       157 
     | 
    
         
            -
                        ggml_backend_sched_t sched,
         
     | 
| 
       158 
     | 
    
         
            -
                        struct ggml_cgraph * graph);
         
     | 
| 
      
 165 
     | 
    
         
            +
                // Allocate and compute graph on the backend scheduler
         
     | 
| 
      
 166 
     | 
    
         
            +
                GGML_API void                  ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
         
     | 
| 
       159 
167 
     | 
    
         | 
| 
      
 168 
     | 
    
         
            +
                // Reset all assignments and allocators - must be called before using the sched allocators to allocate inputs
         
     | 
| 
      
 169 
     | 
    
         
            +
                GGML_API void                  ggml_backend_sched_reset(ggml_backend_sched_t sched);
         
     | 
| 
       160 
170 
     | 
    
         | 
| 
       161 
171 
     | 
    
         
             
                //
         
     | 
| 
       162 
172 
     | 
    
         
             
                // Utils
         
     | 
| 
         @@ -173,10 +183,10 @@ extern "C" { 
     | 
|
| 
       173 
183 
     | 
    
         
             
                GGML_API struct ggml_backend_graph_copy ggml_backend_graph_copy(ggml_backend_t backend, struct ggml_cgraph * graph);
         
     | 
| 
       174 
184 
     | 
    
         
             
                GGML_API void                           ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy);
         
     | 
| 
       175 
185 
     | 
    
         | 
| 
       176 
     | 
    
         
            -
                typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
         
     | 
| 
      
 186 
     | 
    
         
            +
                typedef bool (*GGML_CALL ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data);
         
     | 
| 
       177 
187 
     | 
    
         | 
| 
       178 
188 
     | 
    
         
             
                // Compare the output of two backends
         
     | 
| 
       179 
     | 
    
         
            -
                GGML_API  
     | 
| 
      
 189 
     | 
    
         
            +
                GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data);
         
     | 
| 
       180 
190 
     | 
    
         | 
| 
       181 
191 
     | 
    
         
             
                // Tensor initialization
         
     | 
| 
       182 
192 
     | 
    
         
             
                GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
         
     |