llama_cpp 0.3.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +37 -0
 - data/ext/llama_cpp/extconf.rb +9 -0
 - data/ext/llama_cpp/llama_cpp.cpp +302 -112
 - data/ext/llama_cpp/src/ggml-cuda.cu +677 -118
 - data/ext/llama_cpp/src/ggml-metal.h +5 -1
 - data/ext/llama_cpp/src/ggml-metal.m +65 -45
 - data/ext/llama_cpp/src/ggml-metal.metal +610 -484
 - data/ext/llama_cpp/src/ggml-mpi.c +216 -0
 - data/ext/llama_cpp/src/ggml-mpi.h +39 -0
 - data/ext/llama_cpp/src/ggml.c +1146 -812
 - data/ext/llama_cpp/src/ggml.h +77 -19
 - data/ext/llama_cpp/src/k_quants.h +8 -0
 - data/ext/llama_cpp/src/llama.cpp +289 -104
 - data/ext/llama_cpp/src/llama.h +46 -3
 - data/lib/llama_cpp/version.rb +2 -2
 - data/lib/llama_cpp.rb +2 -1
 - data/sig/llama_cpp.rbs +14 -1
 - metadata +4 -2
 
    
        data/ext/llama_cpp/llama_cpp.cpp
    CHANGED
    
    | 
         @@ -17,9 +17,9 @@ public: 
     | 
|
| 
       17 
17 
     | 
    
         
             
                data.id = 0;
         
     | 
| 
       18 
18 
     | 
    
         
             
                data.logit = 0.0;
         
     | 
| 
       19 
19 
     | 
    
         
             
                data.p = 0.0;
         
     | 
| 
       20 
     | 
    
         
            -
              } 
     | 
| 
      
 20 
     | 
    
         
            +
              }
         
     | 
| 
       21 
21 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
              ~LLaMATokenDataWrapper(){} 
     | 
| 
      
 22 
     | 
    
         
            +
              ~LLaMATokenDataWrapper() {}
         
     | 
| 
       23 
23 
     | 
    
         
             
            };
         
     | 
| 
       24 
24 
     | 
    
         | 
| 
       25 
25 
     | 
    
         
             
            class RbLLaMATokenData {
         
     | 
| 
         @@ -28,22 +28,22 @@ public: 
     | 
|
| 
       28 
28 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = (LLaMATokenDataWrapper*)ruby_xmalloc(sizeof(LLaMATokenDataWrapper));
         
     | 
| 
       29 
29 
     | 
    
         
             
                new (ptr) LLaMATokenDataWrapper();
         
     | 
| 
       30 
30 
     | 
    
         
             
                return TypedData_Wrap_Struct(self, &llama_token_data_type, ptr);
         
     | 
| 
       31 
     | 
    
         
            -
              } 
     | 
| 
      
 31 
     | 
    
         
            +
              }
         
     | 
| 
       32 
32 
     | 
    
         | 
| 
       33 
33 
     | 
    
         
             
              static void llama_token_data_free(void* ptr) {
         
     | 
| 
       34 
34 
     | 
    
         
             
                ((LLaMATokenDataWrapper*)ptr)->~LLaMATokenDataWrapper();
         
     | 
| 
       35 
35 
     | 
    
         
             
                ruby_xfree(ptr);
         
     | 
| 
       36 
     | 
    
         
            -
              } 
     | 
| 
      
 36 
     | 
    
         
            +
              }
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
38 
     | 
    
         
             
              static size_t llama_token_data_size(const void* ptr) {
         
     | 
| 
       39 
39 
     | 
    
         
             
                return sizeof(*((LLaMATokenDataWrapper*)ptr));
         
     | 
| 
       40 
     | 
    
         
            -
              } 
     | 
| 
      
 40 
     | 
    
         
            +
              }
         
     | 
| 
       41 
41 
     | 
    
         | 
| 
       42 
42 
     | 
    
         
             
              static LLaMATokenDataWrapper* get_llama_token_data(VALUE self) {
         
     | 
| 
       43 
43 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr;
         
     | 
| 
       44 
44 
     | 
    
         
             
                TypedData_Get_Struct(self, LLaMATokenDataWrapper, &llama_token_data_type, ptr);
         
     | 
| 
       45 
45 
     | 
    
         
             
                return ptr;
         
     | 
| 
       46 
     | 
    
         
            -
              } 
     | 
| 
      
 46 
     | 
    
         
            +
              }
         
     | 
| 
       47 
47 
     | 
    
         | 
| 
       48 
48 
     | 
    
         
             
              static void define_class(VALUE outer) {
         
     | 
| 
       49 
49 
     | 
    
         
             
                rb_cLLaMATokenData = rb_define_class_under(outer, "TokenData", rb_cObject);
         
     | 
| 
         @@ -95,36 +95,36 @@ private: 
     | 
|
| 
       95 
95 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = get_llama_token_data(self);
         
     | 
| 
       96 
96 
     | 
    
         
             
                ptr->data.id = NUM2INT(id);
         
     | 
| 
       97 
97 
     | 
    
         
             
                return INT2NUM(ptr->data.id);
         
     | 
| 
       98 
     | 
    
         
            -
              } 
     | 
| 
      
 98 
     | 
    
         
            +
              }
         
     | 
| 
       99 
99 
     | 
    
         | 
| 
       100 
100 
     | 
    
         
             
              static VALUE _llama_token_data_get_id(VALUE self) {
         
     | 
| 
       101 
101 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = get_llama_token_data(self);
         
     | 
| 
       102 
102 
     | 
    
         
             
                return INT2NUM(ptr->data.id);
         
     | 
| 
       103 
     | 
    
         
            -
              } 
     | 
| 
      
 103 
     | 
    
         
            +
              }
         
     | 
| 
       104 
104 
     | 
    
         | 
| 
       105 
105 
     | 
    
         
             
              // logit
         
     | 
| 
       106 
106 
     | 
    
         
             
              static VALUE _llama_token_data_set_logit(VALUE self, VALUE logit) {
         
     | 
| 
       107 
107 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = get_llama_token_data(self);
         
     | 
| 
       108 
108 
     | 
    
         
             
                ptr->data.logit = NUM2DBL(logit);
         
     | 
| 
       109 
109 
     | 
    
         
             
                return DBL2NUM(ptr->data.logit);
         
     | 
| 
       110 
     | 
    
         
            -
              } 
     | 
| 
      
 110 
     | 
    
         
            +
              }
         
     | 
| 
       111 
111 
     | 
    
         | 
| 
       112 
112 
     | 
    
         
             
              static VALUE _llama_token_data_get_logit(VALUE self) {
         
     | 
| 
       113 
113 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = get_llama_token_data(self);
         
     | 
| 
       114 
114 
     | 
    
         
             
                return DBL2NUM(ptr->data.logit);
         
     | 
| 
       115 
     | 
    
         
            -
              } 
     | 
| 
      
 115 
     | 
    
         
            +
              }
         
     | 
| 
       116 
116 
     | 
    
         | 
| 
       117 
117 
     | 
    
         
             
              // p
         
     | 
| 
       118 
118 
     | 
    
         
             
              static VALUE _llama_token_data_set_p(VALUE self, VALUE p) {
         
     | 
| 
       119 
119 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = get_llama_token_data(self);
         
     | 
| 
       120 
120 
     | 
    
         
             
                ptr->data.p = NUM2DBL(p);
         
     | 
| 
       121 
121 
     | 
    
         
             
                return DBL2NUM(ptr->data.p);
         
     | 
| 
       122 
     | 
    
         
            -
              } 
     | 
| 
      
 122 
     | 
    
         
            +
              }
         
     | 
| 
       123 
123 
     | 
    
         | 
| 
       124 
124 
     | 
    
         
             
              static VALUE _llama_token_data_get_p(VALUE self) {
         
     | 
| 
       125 
125 
     | 
    
         
             
                LLaMATokenDataWrapper* ptr = get_llama_token_data(self);
         
     | 
| 
       126 
126 
     | 
    
         
             
                return DBL2NUM(ptr->data.p);
         
     | 
| 
       127 
     | 
    
         
            -
              } 
     | 
| 
      
 127 
     | 
    
         
            +
              }
         
     | 
| 
       128 
128 
     | 
    
         
             
            };
         
     | 
| 
       129 
129 
     | 
    
         | 
| 
       130 
130 
     | 
    
         
             
            const rb_data_type_t RbLLaMATokenData::llama_token_data_type = {
         
     | 
| 
         @@ -145,14 +145,14 @@ public: 
     | 
|
| 
       145 
145 
     | 
    
         
             
                array.data = nullptr;
         
     | 
| 
       146 
146 
     | 
    
         
             
                array.size = 0;
         
     | 
| 
       147 
147 
     | 
    
         
             
                array.sorted = false;
         
     | 
| 
       148 
     | 
    
         
            -
              } 
     | 
| 
      
 148 
     | 
    
         
            +
              }
         
     | 
| 
       149 
149 
     | 
    
         | 
| 
       150 
150 
     | 
    
         
             
              ~LLaMATokenDataArrayWrapper() {
         
     | 
| 
       151 
151 
     | 
    
         
             
                if (array.data) {
         
     | 
| 
       152 
152 
     | 
    
         
             
                  ruby_xfree(array.data);
         
     | 
| 
       153 
153 
     | 
    
         
             
                  array.data = nullptr;
         
     | 
| 
       154 
154 
     | 
    
         
             
                }
         
     | 
| 
       155 
     | 
    
         
            -
              } 
     | 
| 
      
 155 
     | 
    
         
            +
              }
         
     | 
| 
       156 
156 
     | 
    
         
             
            };
         
     | 
| 
       157 
157 
     | 
    
         | 
| 
       158 
158 
     | 
    
         
             
            class RbLLaMATokenDataArray {
         
     | 
| 
         @@ -161,22 +161,22 @@ public: 
     | 
|
| 
       161 
161 
     | 
    
         
             
                LLaMATokenDataArrayWrapper* ptr = (LLaMATokenDataArrayWrapper*)ruby_xmalloc(sizeof(LLaMATokenDataArrayWrapper));
         
     | 
| 
       162 
162 
     | 
    
         
             
                new (ptr) LLaMATokenDataArrayWrapper();
         
     | 
| 
       163 
163 
     | 
    
         
             
                return TypedData_Wrap_Struct(self, &llama_token_data_array_type, ptr);
         
     | 
| 
       164 
     | 
    
         
            -
              } 
     | 
| 
      
 164 
     | 
    
         
            +
              }
         
     | 
| 
       165 
165 
     | 
    
         | 
| 
       166 
166 
     | 
    
         
             
              static void llama_token_data_array_free(void* ptr) {
         
     | 
| 
       167 
167 
     | 
    
         
             
                ((LLaMATokenDataArrayWrapper*)ptr)->~LLaMATokenDataArrayWrapper();
         
     | 
| 
       168 
168 
     | 
    
         
             
                ruby_xfree(ptr);
         
     | 
| 
       169 
     | 
    
         
            -
              } 
     | 
| 
      
 169 
     | 
    
         
            +
              }
         
     | 
| 
       170 
170 
     | 
    
         | 
| 
       171 
171 
     | 
    
         
             
              static size_t llama_token_data_array_size(const void* ptr) {
         
     | 
| 
       172 
172 
     | 
    
         
             
                return sizeof(*((LLaMATokenDataArrayWrapper*)ptr));
         
     | 
| 
       173 
     | 
    
         
            -
              } 
     | 
| 
      
 173 
     | 
    
         
            +
              }
         
     | 
| 
       174 
174 
     | 
    
         | 
| 
       175 
175 
     | 
    
         
             
              static LLaMATokenDataArrayWrapper* get_llama_token_data_array(VALUE self) {
         
     | 
| 
       176 
176 
     | 
    
         
             
                LLaMATokenDataArrayWrapper* ptr;
         
     | 
| 
       177 
177 
     | 
    
         
             
                TypedData_Get_Struct(self, LLaMATokenDataArrayWrapper, &llama_token_data_array_type, ptr);
         
     | 
| 
       178 
178 
     | 
    
         
             
                return ptr;
         
     | 
| 
       179 
     | 
    
         
            -
              } 
     | 
| 
      
 179 
     | 
    
         
            +
              }
         
     | 
| 
       180 
180 
     | 
    
         | 
| 
       181 
181 
     | 
    
         
             
              static void define_class(VALUE outer) {
         
     | 
| 
       182 
182 
     | 
    
         
             
                rb_cLLaMATokenDataArray = rb_define_class_under(outer, "TokenDataArray", rb_cObject);
         
     | 
| 
         @@ -184,7 +184,7 @@ public: 
     | 
|
| 
       184 
184 
     | 
    
         
             
                rb_define_method(rb_cLLaMATokenDataArray, "initialize", RUBY_METHOD_FUNC(_llama_token_data_array_init), -1);
         
     | 
| 
       185 
185 
     | 
    
         
             
                rb_define_method(rb_cLLaMATokenDataArray, "size", RUBY_METHOD_FUNC(_llama_token_data_array_get_size), 0);
         
     | 
| 
       186 
186 
     | 
    
         
             
                rb_define_method(rb_cLLaMATokenDataArray, "sorted", RUBY_METHOD_FUNC(_llama_token_data_array_get_sorted), 0);
         
     | 
| 
       187 
     | 
    
         
            -
              } 
     | 
| 
      
 187 
     | 
    
         
            +
              }
         
     | 
| 
       188 
188 
     | 
    
         | 
| 
       189 
189 
     | 
    
         
             
            private:
         
     | 
| 
       190 
190 
     | 
    
         
             
              static const rb_data_type_t llama_token_data_array_type;
         
     | 
| 
         @@ -233,17 +233,17 @@ private: 
     | 
|
| 
       233 
233 
     | 
    
         
             
                ptr->array.sorted = kw_values[0] == Qtrue;
         
     | 
| 
       234 
234 
     | 
    
         | 
| 
       235 
235 
     | 
    
         
             
                return self;
         
     | 
| 
       236 
     | 
    
         
            -
              } 
     | 
| 
      
 236 
     | 
    
         
            +
              }
         
     | 
| 
       237 
237 
     | 
    
         | 
| 
       238 
238 
     | 
    
         
             
              static VALUE _llama_token_data_array_get_size(VALUE self) {
         
     | 
| 
       239 
239 
     | 
    
         
             
                LLaMATokenDataArrayWrapper* ptr = get_llama_token_data_array(self);
         
     | 
| 
       240 
240 
     | 
    
         
             
                return SIZET2NUM(ptr->array.size);
         
     | 
| 
       241 
     | 
    
         
            -
              } 
     | 
| 
      
 241 
     | 
    
         
            +
              }
         
     | 
| 
       242 
242 
     | 
    
         | 
| 
       243 
243 
     | 
    
         
             
              static VALUE _llama_token_data_array_get_sorted(VALUE self) {
         
     | 
| 
       244 
244 
     | 
    
         
             
                LLaMATokenDataArrayWrapper* ptr = get_llama_token_data_array(self);
         
     | 
| 
       245 
245 
     | 
    
         
             
                return ptr->array.sorted ? Qtrue : Qfalse;
         
     | 
| 
       246 
     | 
    
         
            -
              } 
     | 
| 
      
 246 
     | 
    
         
            +
              }
         
     | 
| 
       247 
247 
     | 
    
         
             
            };
         
     | 
| 
       248 
248 
     | 
    
         | 
| 
       249 
249 
     | 
    
         
             
            const rb_data_type_t RbLLaMATokenDataArray::llama_token_data_array_type = {
         
     | 
| 
         @@ -260,9 +260,9 @@ class LLaMATimingsWrapper { 
     | 
|
| 
       260 
260 
     | 
    
         
             
            public:
         
     | 
| 
       261 
261 
     | 
    
         
             
              struct llama_timings timings;
         
     | 
| 
       262 
262 
     | 
    
         | 
| 
       263 
     | 
    
         
            -
              LLaMATimingsWrapper(){} 
     | 
| 
      
 263 
     | 
    
         
            +
              LLaMATimingsWrapper() {}
         
     | 
| 
       264 
264 
     | 
    
         | 
| 
       265 
     | 
    
         
            -
              ~LLaMATimingsWrapper(){} 
     | 
| 
      
 265 
     | 
    
         
            +
              ~LLaMATimingsWrapper() {}
         
     | 
| 
       266 
266 
     | 
    
         
             
            };
         
     | 
| 
       267 
267 
     | 
    
         | 
| 
       268 
268 
     | 
    
         
             
            class RbLLaMATimings {
         
     | 
| 
         @@ -365,9 +365,9 @@ class LLaMAContextParamsWrapper { 
     | 
|
| 
       365 
365 
     | 
    
         
             
            public:
         
     | 
| 
       366 
366 
     | 
    
         
             
              struct llama_context_params params;
         
     | 
| 
       367 
367 
     | 
    
         | 
| 
       368 
     | 
    
         
            -
              LLaMAContextParamsWrapper() : params(llama_context_default_params()){} 
     | 
| 
      
 368 
     | 
    
         
            +
              LLaMAContextParamsWrapper() : params(llama_context_default_params()) {}
         
     | 
| 
       369 
369 
     | 
    
         | 
| 
       370 
     | 
    
         
            -
              ~LLaMAContextParamsWrapper(){} 
     | 
| 
      
 370 
     | 
    
         
            +
              ~LLaMAContextParamsWrapper() {}
         
     | 
| 
       371 
371 
     | 
    
         
             
            };
         
     | 
| 
       372 
372 
     | 
    
         | 
| 
       373 
373 
     | 
    
         
             
            class RbLLaMAContextParams {
         
     | 
| 
         @@ -376,22 +376,22 @@ public: 
     | 
|
| 
       376 
376 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = (LLaMAContextParamsWrapper*)ruby_xmalloc(sizeof(LLaMAContextParamsWrapper));
         
     | 
| 
       377 
377 
     | 
    
         
             
                new (ptr) LLaMAContextParamsWrapper();
         
     | 
| 
       378 
378 
     | 
    
         
             
                return TypedData_Wrap_Struct(self, &llama_context_params_type, ptr);
         
     | 
| 
       379 
     | 
    
         
            -
              } 
     | 
| 
      
 379 
     | 
    
         
            +
              }
         
     | 
| 
       380 
380 
     | 
    
         | 
| 
       381 
381 
     | 
    
         
             
              static void llama_context_params_free(void* ptr) {
         
     | 
| 
       382 
382 
     | 
    
         
             
                ((LLaMAContextParamsWrapper*)ptr)->~LLaMAContextParamsWrapper();
         
     | 
| 
       383 
383 
     | 
    
         
             
                ruby_xfree(ptr);
         
     | 
| 
       384 
     | 
    
         
            -
              } 
     | 
| 
      
 384 
     | 
    
         
            +
              }
         
     | 
| 
       385 
385 
     | 
    
         | 
| 
       386 
386 
     | 
    
         
             
              static size_t llama_context_params_size(const void* ptr) {
         
     | 
| 
       387 
387 
     | 
    
         
             
                return sizeof(*((LLaMAContextParamsWrapper*)ptr));
         
     | 
| 
       388 
     | 
    
         
            -
              } 
     | 
| 
      
 388 
     | 
    
         
            +
              }
         
     | 
| 
       389 
389 
     | 
    
         | 
| 
       390 
390 
     | 
    
         
             
              static LLaMAContextParamsWrapper* get_llama_context_params(VALUE self) {
         
     | 
| 
       391 
391 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr;
         
     | 
| 
       392 
392 
     | 
    
         
             
                TypedData_Get_Struct(self, LLaMAContextParamsWrapper, &llama_context_params_type, ptr);
         
     | 
| 
       393 
393 
     | 
    
         
             
                return ptr;
         
     | 
| 
       394 
     | 
    
         
            -
              } 
     | 
| 
      
 394 
     | 
    
         
            +
              }
         
     | 
| 
       395 
395 
     | 
    
         | 
| 
       396 
396 
     | 
    
         
             
              static void define_class(VALUE outer) {
         
     | 
| 
       397 
397 
     | 
    
         
             
                rb_cLLaMAContextParams = rb_define_class_under(outer, "ContextParams", rb_cObject);
         
     | 
| 
         @@ -406,6 +406,10 @@ public: 
     | 
|
| 
       406 
406 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "main_gpu=", RUBY_METHOD_FUNC(_llama_context_params_set_main_gpu), 1);
         
     | 
| 
       407 
407 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "main_gpu", RUBY_METHOD_FUNC(_llama_context_params_get_main_gpu), 0);
         
     | 
| 
       408 
408 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "tensor_split", RUBY_METHOD_FUNC(_llama_context_params_get_tensor_split), 0);
         
     | 
| 
      
 409 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAContextParams, "rope_freq_base=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_base), 1);
         
     | 
| 
      
 410 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAContextParams, "rope_freq_base", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_base), 0);
         
     | 
| 
      
 411 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale=", RUBY_METHOD_FUNC(_llama_context_params_set_rope_freq_scale), 1);
         
     | 
| 
      
 412 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAContextParams, "rope_freq_scale", RUBY_METHOD_FUNC(_llama_context_params_get_rope_freq_scale), 0);
         
     | 
| 
       409 
413 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "low_vram=", RUBY_METHOD_FUNC(_llama_context_params_set_low_vram), 1);
         
     | 
| 
       410 
414 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "low_vram", RUBY_METHOD_FUNC(_llama_context_params_get_low_vram), 0);
         
     | 
| 
       411 
415 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "seed=", RUBY_METHOD_FUNC(_llama_context_params_set_seed), 1);
         
     | 
| 
         @@ -422,7 +426,7 @@ public: 
     | 
|
| 
       422 
426 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "use_mlock", RUBY_METHOD_FUNC(_llama_context_params_get_use_mlock), 0);
         
     | 
| 
       423 
427 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "embedding=", RUBY_METHOD_FUNC(_llama_context_params_set_embedding), 1);
         
     | 
| 
       424 
428 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContextParams, "embedding", RUBY_METHOD_FUNC(_llama_context_params_get_embedding), 0);
         
     | 
| 
       425 
     | 
    
         
            -
              } 
     | 
| 
      
 429 
     | 
    
         
            +
              }
         
     | 
| 
       426 
430 
     | 
    
         | 
| 
       427 
431 
     | 
    
         
             
            private:
         
     | 
| 
       428 
432 
     | 
    
         
             
              static const rb_data_type_t llama_context_params_type;
         
     | 
| 
         @@ -431,55 +435,55 @@ private: 
     | 
|
| 
       431 
435 
     | 
    
         
             
              //   LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       432 
436 
     | 
    
         
             
              //   new (ptr) LLaMAContextParamsWrapper();
         
     | 
| 
       433 
437 
     | 
    
         
             
              //   return self;
         
     | 
| 
       434 
     | 
    
         
            -
              // } 
     | 
| 
      
 438 
     | 
    
         
            +
              // }
         
     | 
| 
       435 
439 
     | 
    
         | 
| 
       436 
440 
     | 
    
         
             
              // n_ctx
         
     | 
| 
       437 
441 
     | 
    
         
             
              static VALUE _llama_context_params_set_n_ctx(VALUE self, VALUE n_ctx) {
         
     | 
| 
       438 
442 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       439 
443 
     | 
    
         
             
                ptr->params.n_ctx = NUM2INT(n_ctx);
         
     | 
| 
       440 
444 
     | 
    
         
             
                return INT2NUM(ptr->params.n_ctx);
         
     | 
| 
       441 
     | 
    
         
            -
              } 
     | 
| 
      
 445 
     | 
    
         
            +
              }
         
     | 
| 
       442 
446 
     | 
    
         | 
| 
       443 
447 
     | 
    
         
             
              static VALUE _llama_context_params_get_n_ctx(VALUE self) {
         
     | 
| 
       444 
448 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       445 
449 
     | 
    
         
             
                return INT2NUM(ptr->params.n_ctx);
         
     | 
| 
       446 
     | 
    
         
            -
              } 
     | 
| 
      
 450 
     | 
    
         
            +
              }
         
     | 
| 
       447 
451 
     | 
    
         | 
| 
       448 
452 
     | 
    
         
             
              // n_batch
         
     | 
| 
       449 
453 
     | 
    
         
             
              static VALUE _llama_context_params_set_n_batch(VALUE self, VALUE n_batch) {
         
     | 
| 
       450 
454 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       451 
455 
     | 
    
         
             
                ptr->params.n_batch = NUM2INT(n_batch);
         
     | 
| 
       452 
456 
     | 
    
         
             
                return INT2NUM(ptr->params.n_batch);
         
     | 
| 
       453 
     | 
    
         
            -
              } 
     | 
| 
      
 457 
     | 
    
         
            +
              }
         
     | 
| 
       454 
458 
     | 
    
         | 
| 
       455 
459 
     | 
    
         
             
              static VALUE _llama_context_params_get_n_batch(VALUE self) {
         
     | 
| 
       456 
460 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       457 
461 
     | 
    
         
             
                return INT2NUM(ptr->params.n_batch);
         
     | 
| 
       458 
     | 
    
         
            -
              } 
     | 
| 
      
 462 
     | 
    
         
            +
              }
         
     | 
| 
       459 
463 
     | 
    
         | 
| 
       460 
464 
     | 
    
         
             
              // n_gpu_layers
         
     | 
| 
       461 
465 
     | 
    
         
             
              static VALUE _llama_context_params_set_n_gpu_layers(VALUE self, VALUE n_gpu_layers) {
         
     | 
| 
       462 
466 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       463 
467 
     | 
    
         
             
                ptr->params.n_gpu_layers = NUM2INT(n_gpu_layers);
         
     | 
| 
       464 
468 
     | 
    
         
             
                return INT2NUM(ptr->params.n_gpu_layers);
         
     | 
| 
       465 
     | 
    
         
            -
              } 
     | 
| 
      
 469 
     | 
    
         
            +
              }
         
     | 
| 
       466 
470 
     | 
    
         | 
| 
       467 
471 
     | 
    
         
             
              static VALUE _llama_context_params_get_n_gpu_layers(VALUE self) {
         
     | 
| 
       468 
472 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       469 
473 
     | 
    
         
             
                return INT2NUM(ptr->params.n_gpu_layers);
         
     | 
| 
       470 
     | 
    
         
            -
              } 
     | 
| 
      
 474 
     | 
    
         
            +
              }
         
     | 
| 
       471 
475 
     | 
    
         | 
| 
       472 
476 
     | 
    
         
             
              // main_gpu
         
     | 
| 
       473 
477 
     | 
    
         
             
              static VALUE _llama_context_params_set_main_gpu(VALUE self, VALUE main_gpu) {
         
     | 
| 
       474 
478 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       475 
479 
     | 
    
         
             
                ptr->params.main_gpu = NUM2INT(main_gpu);
         
     | 
| 
       476 
480 
     | 
    
         
             
                return INT2NUM(ptr->params.main_gpu);
         
     | 
| 
       477 
     | 
    
         
            -
              } 
     | 
| 
      
 481 
     | 
    
         
            +
              }
         
     | 
| 
       478 
482 
     | 
    
         | 
| 
       479 
483 
     | 
    
         
             
              static VALUE _llama_context_params_get_main_gpu(VALUE self) {
         
     | 
| 
       480 
484 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       481 
485 
     | 
    
         
             
                return INT2NUM(ptr->params.main_gpu);
         
     | 
| 
       482 
     | 
    
         
            -
              } 
     | 
| 
      
 486 
     | 
    
         
            +
              }
         
     | 
| 
       483 
487 
     | 
    
         | 
| 
       484 
488 
     | 
    
         
             
              // tensor_split
         
     | 
| 
       485 
489 
     | 
    
         
             
              static VALUE _llama_context_params_get_tensor_split(VALUE self) {
         
     | 
| 
         @@ -492,19 +496,43 @@ private: 
     | 
|
| 
       492 
496 
     | 
    
         
             
                  rb_ary_store(ret, i, DBL2NUM(ptr->params.tensor_split[i]));
         
     | 
| 
       493 
497 
     | 
    
         
             
                }
         
     | 
| 
       494 
498 
     | 
    
         
             
                return ret;
         
     | 
| 
       495 
     | 
    
         
            -
              } 
     | 
| 
      
 499 
     | 
    
         
            +
              }
         
     | 
| 
      
 500 
     | 
    
         
            +
             
     | 
| 
      
 501 
     | 
    
         
            +
              // rope_freq_base
         
     | 
| 
      
 502 
     | 
    
         
            +
              static VALUE _llama_context_params_set_rope_freq_base(VALUE self, VALUE rope_freq_base) {
         
     | 
| 
      
 503 
     | 
    
         
            +
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
      
 504 
     | 
    
         
            +
                ptr->params.rope_freq_base = NUM2DBL(rope_freq_base);
         
     | 
| 
      
 505 
     | 
    
         
            +
                return DBL2NUM(ptr->params.rope_freq_base);
         
     | 
| 
      
 506 
     | 
    
         
            +
              }
         
     | 
| 
      
 507 
     | 
    
         
            +
             
     | 
| 
      
 508 
     | 
    
         
            +
              static VALUE _llama_context_params_get_rope_freq_base(VALUE self) {
         
     | 
| 
      
 509 
     | 
    
         
            +
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
      
 510 
     | 
    
         
            +
                return DBL2NUM(ptr->params.rope_freq_base);
         
     | 
| 
      
 511 
     | 
    
         
            +
              }
         
     | 
| 
      
 512 
     | 
    
         
            +
             
     | 
| 
      
 513 
     | 
    
         
            +
              // rope_freq_scale
         
     | 
| 
      
 514 
     | 
    
         
            +
              static VALUE _llama_context_params_set_rope_freq_scale(VALUE self, VALUE rope_freq_scale) {
         
     | 
| 
      
 515 
     | 
    
         
            +
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
      
 516 
     | 
    
         
            +
                ptr->params.rope_freq_scale = NUM2DBL(rope_freq_scale);
         
     | 
| 
      
 517 
     | 
    
         
            +
                return DBL2NUM(ptr->params.rope_freq_scale);
         
     | 
| 
      
 518 
     | 
    
         
            +
              }
         
     | 
| 
      
 519 
     | 
    
         
            +
             
     | 
| 
      
 520 
     | 
    
         
            +
              static VALUE _llama_context_params_get_rope_freq_scale(VALUE self) {
         
     | 
| 
      
 521 
     | 
    
         
            +
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
      
 522 
     | 
    
         
            +
                return DBL2NUM(ptr->params.rope_freq_scale);
         
     | 
| 
      
 523 
     | 
    
         
            +
              }
         
     | 
| 
       496 
524 
     | 
    
         | 
| 
       497 
525 
     | 
    
         
             
              // low_vram
         
     | 
| 
       498 
526 
     | 
    
         
             
              static VALUE _llama_context_params_set_low_vram(VALUE self, VALUE low_vram) {
         
     | 
| 
       499 
527 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       500 
528 
     | 
    
         
             
                ptr->params.low_vram = low_vram == Qtrue ? true : false;
         
     | 
| 
       501 
529 
     | 
    
         
             
                return ptr->params.low_vram ? Qtrue : Qfalse;
         
     | 
| 
       502 
     | 
    
         
            -
              } 
     | 
| 
      
 530 
     | 
    
         
            +
              }
         
     | 
| 
       503 
531 
     | 
    
         | 
| 
       504 
532 
     | 
    
         
             
              static VALUE _llama_context_params_get_low_vram(VALUE self) {
         
     | 
| 
       505 
533 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       506 
534 
     | 
    
         
             
                return ptr->params.low_vram ? Qtrue : Qfalse;
         
     | 
| 
       507 
     | 
    
         
            -
              } 
     | 
| 
      
 535 
     | 
    
         
            +
              }
         
     | 
| 
       508 
536 
     | 
    
         | 
| 
       509 
537 
     | 
    
         
             
              // seed
         
     | 
| 
       510 
538 
     | 
    
         
             
              static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
         
     | 
| 
         @@ -515,84 +543,84 @@ private: 
     | 
|
| 
       515 
543 
     | 
    
         
             
                }
         
     | 
| 
       516 
544 
     | 
    
         
             
                ptr->params.seed = NUM2INT(seed);
         
     | 
| 
       517 
545 
     | 
    
         
             
                return INT2NUM(ptr->params.seed);
         
     | 
| 
       518 
     | 
    
         
            -
              } 
     | 
| 
      
 546 
     | 
    
         
            +
              }
         
     | 
| 
       519 
547 
     | 
    
         | 
| 
       520 
548 
     | 
    
         
             
              static VALUE _llama_context_params_get_seed(VALUE self) {
         
     | 
| 
       521 
549 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       522 
550 
     | 
    
         
             
                return INT2NUM(ptr->params.seed);
         
     | 
| 
       523 
     | 
    
         
            -
              } 
     | 
| 
      
 551 
     | 
    
         
            +
              }
         
     | 
| 
       524 
552 
     | 
    
         | 
| 
       525 
553 
     | 
    
         
             
              // f16_kv
         
     | 
| 
       526 
554 
     | 
    
         
             
              static VALUE _llama_context_params_set_f16_kv(VALUE self, VALUE f16_kv) {
         
     | 
| 
       527 
555 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       528 
556 
     | 
    
         
             
                ptr->params.f16_kv = f16_kv == Qtrue ? true : false;
         
     | 
| 
       529 
557 
     | 
    
         
             
                return ptr->params.f16_kv ? Qtrue : Qfalse;
         
     | 
| 
       530 
     | 
    
         
            -
              } 
     | 
| 
      
 558 
     | 
    
         
            +
              }
         
     | 
| 
       531 
559 
     | 
    
         | 
| 
       532 
560 
     | 
    
         
             
              static VALUE _llama_context_params_get_f16_kv(VALUE self) {
         
     | 
| 
       533 
561 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       534 
562 
     | 
    
         
             
                return ptr->params.f16_kv ? Qtrue : Qfalse;
         
     | 
| 
       535 
     | 
    
         
            -
              } 
     | 
| 
      
 563 
     | 
    
         
            +
              }
         
     | 
| 
       536 
564 
     | 
    
         | 
| 
       537 
565 
     | 
    
         
             
              // logits_all
         
     | 
| 
       538 
566 
     | 
    
         
             
              static VALUE _llama_context_params_set_logits_all(VALUE self, VALUE logits_all) {
         
     | 
| 
       539 
567 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       540 
568 
     | 
    
         
             
                ptr->params.logits_all = logits_all == Qtrue ? true : false;
         
     | 
| 
       541 
569 
     | 
    
         
             
                return ptr->params.logits_all ? Qtrue : Qfalse;
         
     | 
| 
       542 
     | 
    
         
            -
              } 
     | 
| 
      
 570 
     | 
    
         
            +
              }
         
     | 
| 
       543 
571 
     | 
    
         | 
| 
       544 
572 
     | 
    
         
             
              static VALUE _llama_context_params_get_logits_all(VALUE self) {
         
     | 
| 
       545 
573 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       546 
574 
     | 
    
         
             
                return ptr->params.logits_all ? Qtrue : Qfalse;
         
     | 
| 
       547 
     | 
    
         
            -
              } 
     | 
| 
      
 575 
     | 
    
         
            +
              }
         
     | 
| 
       548 
576 
     | 
    
         | 
| 
       549 
577 
     | 
    
         
             
              // vocab_only
         
     | 
| 
       550 
578 
     | 
    
         
             
              static VALUE _llama_context_params_set_vocab_only(VALUE self, VALUE vocab_only) {
         
     | 
| 
       551 
579 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       552 
580 
     | 
    
         
             
                ptr->params.vocab_only = vocab_only == Qtrue ? true : false;
         
     | 
| 
       553 
581 
     | 
    
         
             
                return ptr->params.vocab_only ? Qtrue : Qfalse;
         
     | 
| 
       554 
     | 
    
         
            -
              } 
     | 
| 
      
 582 
     | 
    
         
            +
              }
         
     | 
| 
       555 
583 
     | 
    
         | 
| 
       556 
584 
     | 
    
         
             
              static VALUE _llama_context_params_get_vocab_only(VALUE self) {
         
     | 
| 
       557 
585 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       558 
586 
     | 
    
         
             
                return ptr->params.vocab_only ? Qtrue : Qfalse;
         
     | 
| 
       559 
     | 
    
         
            -
              } 
     | 
| 
      
 587 
     | 
    
         
            +
              }
         
     | 
| 
       560 
588 
     | 
    
         | 
| 
       561 
589 
     | 
    
         
             
              // use_mmap
         
     | 
| 
       562 
590 
     | 
    
         
             
              static VALUE _llama_context_params_set_use_mmap(VALUE self, VALUE use_mmap) {
         
     | 
| 
       563 
591 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       564 
592 
     | 
    
         
             
                ptr->params.use_mmap = use_mmap == Qtrue ? true : false;
         
     | 
| 
       565 
593 
     | 
    
         
             
                return ptr->params.use_mmap ? Qtrue : Qfalse;
         
     | 
| 
       566 
     | 
    
         
            -
              } 
     | 
| 
      
 594 
     | 
    
         
            +
              }
         
     | 
| 
       567 
595 
     | 
    
         | 
| 
       568 
596 
     | 
    
         
             
              static VALUE _llama_context_params_get_use_mmap(VALUE self) {
         
     | 
| 
       569 
597 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       570 
598 
     | 
    
         
             
                return ptr->params.use_mmap ? Qtrue : Qfalse;
         
     | 
| 
       571 
     | 
    
         
            -
              } 
     | 
| 
      
 599 
     | 
    
         
            +
              }
         
     | 
| 
       572 
600 
     | 
    
         | 
| 
       573 
601 
     | 
    
         
             
              // use_mlock
         
     | 
| 
       574 
602 
     | 
    
         
             
              static VALUE _llama_context_params_set_use_mlock(VALUE self, VALUE use_mlock) {
         
     | 
| 
       575 
603 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       576 
604 
     | 
    
         
             
                ptr->params.use_mlock = use_mlock == Qtrue ? true : false;
         
     | 
| 
       577 
605 
     | 
    
         
             
                return ptr->params.use_mlock ? Qtrue : Qfalse;
         
     | 
| 
       578 
     | 
    
         
            -
              } 
     | 
| 
      
 606 
     | 
    
         
            +
              }
         
     | 
| 
       579 
607 
     | 
    
         | 
| 
       580 
608 
     | 
    
         
             
              static VALUE _llama_context_params_get_use_mlock(VALUE self) {
         
     | 
| 
       581 
609 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       582 
610 
     | 
    
         
             
                return ptr->params.use_mlock ? Qtrue : Qfalse;
         
     | 
| 
       583 
     | 
    
         
            -
              } 
     | 
| 
      
 611 
     | 
    
         
            +
              }
         
     | 
| 
       584 
612 
     | 
    
         | 
| 
       585 
613 
     | 
    
         
             
              // embedding
         
     | 
| 
       586 
614 
     | 
    
         
             
              static VALUE _llama_context_params_set_embedding(VALUE self, VALUE embedding) {
         
     | 
| 
       587 
615 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       588 
616 
     | 
    
         
             
                ptr->params.embedding = embedding == Qtrue ? true : false;
         
     | 
| 
       589 
617 
     | 
    
         
             
                return ptr->params.embedding ? Qtrue : Qfalse;
         
     | 
| 
       590 
     | 
    
         
            -
              } 
     | 
| 
      
 618 
     | 
    
         
            +
              }
         
     | 
| 
       591 
619 
     | 
    
         | 
| 
       592 
620 
     | 
    
         
             
              static VALUE _llama_context_params_get_embedding(VALUE self) {
         
     | 
| 
       593 
621 
     | 
    
         
             
                LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
         
     | 
| 
       594 
622 
     | 
    
         
             
                return ptr->params.embedding ? Qtrue : Qfalse;
         
     | 
| 
       595 
     | 
    
         
            -
              } 
     | 
| 
      
 623 
     | 
    
         
            +
              }
         
     | 
| 
       596 
624 
     | 
    
         
             
            };
         
     | 
| 
       597 
625 
     | 
    
         | 
| 
       598 
626 
     | 
    
         
             
            const rb_data_type_t RbLLaMAContextParams::llama_context_params_type = {
         
     | 
| 
         @@ -609,9 +637,9 @@ class LLaMAModelQuantizeParamsWrapper { 
     | 
|
| 
       609 
637 
     | 
    
         
             
            public:
         
     | 
| 
       610 
638 
     | 
    
         
             
              llama_model_quantize_params params;
         
     | 
| 
       611 
639 
     | 
    
         | 
| 
       612 
     | 
    
         
            -
              LLaMAModelQuantizeParamsWrapper() : params(llama_model_quantize_default_params()){} 
     | 
| 
      
 640 
     | 
    
         
            +
              LLaMAModelQuantizeParamsWrapper() : params(llama_model_quantize_default_params()) {}
         
     | 
| 
       613 
641 
     | 
    
         | 
| 
       614 
     | 
    
         
            -
              ~LLaMAModelQuantizeParamsWrapper(){} 
     | 
| 
      
 642 
     | 
    
         
            +
              ~LLaMAModelQuantizeParamsWrapper() {}
         
     | 
| 
       615 
643 
     | 
    
         
             
            };
         
     | 
| 
       616 
644 
     | 
    
         | 
| 
       617 
645 
     | 
    
         
             
            class RbLLaMAModelQuantizeParams {
         
     | 
| 
         @@ -620,22 +648,22 @@ public: 
     | 
|
| 
       620 
648 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = (LLaMAModelQuantizeParamsWrapper*)ruby_xmalloc(sizeof(LLaMAModelQuantizeParamsWrapper));
         
     | 
| 
       621 
649 
     | 
    
         
             
                new (ptr) LLaMAModelQuantizeParamsWrapper();
         
     | 
| 
       622 
650 
     | 
    
         
             
                return TypedData_Wrap_Struct(self, &llama_model_quantize_params_type, ptr);
         
     | 
| 
       623 
     | 
    
         
            -
              } 
     | 
| 
      
 651 
     | 
    
         
            +
              }
         
     | 
| 
       624 
652 
     | 
    
         | 
| 
       625 
653 
     | 
    
         
             
              static void llama_model_quantize_params_free(void* ptr) {
         
     | 
| 
       626 
654 
     | 
    
         
             
                ((LLaMAModelQuantizeParamsWrapper*)ptr)->~LLaMAModelQuantizeParamsWrapper();
         
     | 
| 
       627 
655 
     | 
    
         
             
                ruby_xfree(ptr);
         
     | 
| 
       628 
     | 
    
         
            -
              } 
     | 
| 
      
 656 
     | 
    
         
            +
              }
         
     | 
| 
       629 
657 
     | 
    
         | 
| 
       630 
658 
     | 
    
         
             
              static size_t llama_model_quantize_params_size(const void* ptr) {
         
     | 
| 
       631 
659 
     | 
    
         
             
                return sizeof(*((LLaMAModelQuantizeParamsWrapper*)ptr));
         
     | 
| 
       632 
     | 
    
         
            -
              } 
     | 
| 
      
 660 
     | 
    
         
            +
              }
         
     | 
| 
       633 
661 
     | 
    
         | 
| 
       634 
662 
     | 
    
         
             
              static LLaMAModelQuantizeParamsWrapper* get_llama_model_quantize_params(VALUE self) {
         
     | 
| 
       635 
663 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr;
         
     | 
| 
       636 
664 
     | 
    
         
             
                TypedData_Get_Struct(self, LLaMAModelQuantizeParamsWrapper, &llama_model_quantize_params_type, ptr);
         
     | 
| 
       637 
665 
     | 
    
         
             
                return ptr;
         
     | 
| 
       638 
     | 
    
         
            -
              } 
     | 
| 
      
 666 
     | 
    
         
            +
              }
         
     | 
| 
       639 
667 
     | 
    
         | 
| 
       640 
668 
     | 
    
         
             
              static void define_class(VALUE outer) {
         
     | 
| 
       641 
669 
     | 
    
         
             
                rb_cLLaMAModelQuantizeParams = rb_define_class_under(outer, "ModelQuantizeParams", rb_cObject);
         
     | 
| 
         @@ -648,7 +676,7 @@ public: 
     | 
|
| 
       648 
676 
     | 
    
         
             
                rb_define_method(rb_cLLaMAModelQuantizeParams, "allow_requantize", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_allow_requantize), 0);
         
     | 
| 
       649 
677 
     | 
    
         
             
                rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor=", RUBY_METHOD_FUNC(_llama_model_quantize_params_set_quantize_output_tensor), 1);
         
     | 
| 
       650 
678 
     | 
    
         
             
                rb_define_method(rb_cLLaMAModelQuantizeParams, "quantize_output_tensor", RUBY_METHOD_FUNC(_llama_model_quantize_params_get_quantize_output_tensor), 0);
         
     | 
| 
       651 
     | 
    
         
            -
              } 
     | 
| 
      
 679 
     | 
    
         
            +
              }
         
     | 
| 
       652 
680 
     | 
    
         | 
| 
       653 
681 
     | 
    
         
             
            private:
         
     | 
| 
       654 
682 
     | 
    
         
             
              static const rb_data_type_t llama_model_quantize_params_type;
         
     | 
| 
         @@ -658,24 +686,24 @@ private: 
     | 
|
| 
       658 
686 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
         
     | 
| 
       659 
687 
     | 
    
         
             
                ptr->params.nthread = NUM2INT(n_thread);
         
     | 
| 
       660 
688 
     | 
    
         
             
                return INT2NUM(ptr->params.nthread);
         
     | 
| 
       661 
     | 
    
         
            -
              } 
     | 
| 
      
 689 
     | 
    
         
            +
              }
         
     | 
| 
       662 
690 
     | 
    
         | 
| 
       663 
691 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_get_n_thread(VALUE self) {
         
     | 
| 
       664 
692 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
         
     | 
| 
       665 
693 
     | 
    
         
             
                return INT2NUM(ptr->params.nthread);
         
     | 
| 
       666 
     | 
    
         
            -
              } 
     | 
| 
      
 694 
     | 
    
         
            +
              }
         
     | 
| 
       667 
695 
     | 
    
         | 
| 
       668 
696 
     | 
    
         
             
              // ftype
         
     | 
| 
       669 
697 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_set_ftype(VALUE self, VALUE ftype) {
         
     | 
| 
       670 
698 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
         
     | 
| 
       671 
699 
     | 
    
         
             
                ptr->params.ftype = static_cast<enum llama_ftype>(NUM2INT(ftype));
         
     | 
| 
       672 
700 
     | 
    
         
             
                return INT2NUM(ptr->params.ftype);
         
     | 
| 
       673 
     | 
    
         
            -
              } 
     | 
| 
      
 701 
     | 
    
         
            +
              }
         
     | 
| 
       674 
702 
     | 
    
         | 
| 
       675 
703 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_get_ftype(VALUE self) {
         
     | 
| 
       676 
704 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
         
     | 
| 
       677 
705 
     | 
    
         
             
                return INT2NUM(ptr->params.ftype);
         
     | 
| 
       678 
     | 
    
         
            -
              } 
     | 
| 
      
 706 
     | 
    
         
            +
              }
         
     | 
| 
       679 
707 
     | 
    
         | 
| 
       680 
708 
     | 
    
         
             
              // allow_requantize
         
     | 
| 
       681 
709 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_set_allow_requantize(VALUE self, VALUE allow_requantize) {
         
     | 
| 
         @@ -686,12 +714,12 @@ private: 
     | 
|
| 
       686 
714 
     | 
    
         
             
                  ptr->params.allow_requantize = true;
         
     | 
| 
       687 
715 
     | 
    
         
             
                }
         
     | 
| 
       688 
716 
     | 
    
         
             
                return ptr->params.allow_requantize ? Qtrue : Qfalse;
         
     | 
| 
       689 
     | 
    
         
            -
              } 
     | 
| 
      
 717 
     | 
    
         
            +
              }
         
     | 
| 
       690 
718 
     | 
    
         | 
| 
       691 
719 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_get_allow_requantize(VALUE self) {
         
     | 
| 
       692 
720 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
         
     | 
| 
       693 
721 
     | 
    
         
             
                return ptr->params.allow_requantize ? Qtrue : Qfalse;
         
     | 
| 
       694 
     | 
    
         
            -
              } 
     | 
| 
      
 722 
     | 
    
         
            +
              }
         
     | 
| 
       695 
723 
     | 
    
         | 
| 
       696 
724 
     | 
    
         
             
              // quantize_output_tensor
         
     | 
| 
       697 
725 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_set_quantize_output_tensor(VALUE self, VALUE quantize_output_tensor) {
         
     | 
| 
         @@ -702,12 +730,12 @@ private: 
     | 
|
| 
       702 
730 
     | 
    
         
             
                  ptr->params.quantize_output_tensor = true;
         
     | 
| 
       703 
731 
     | 
    
         
             
                }
         
     | 
| 
       704 
732 
     | 
    
         
             
                return ptr->params.quantize_output_tensor ? Qtrue : Qfalse;
         
     | 
| 
       705 
     | 
    
         
            -
              } 
     | 
| 
      
 733 
     | 
    
         
            +
              }
         
     | 
| 
       706 
734 
     | 
    
         | 
| 
       707 
735 
     | 
    
         
             
              static VALUE _llama_model_quantize_params_get_quantize_output_tensor(VALUE self) {
         
     | 
| 
       708 
736 
     | 
    
         
             
                LLaMAModelQuantizeParamsWrapper* ptr = get_llama_model_quantize_params(self);
         
     | 
| 
       709 
737 
     | 
    
         
             
                return ptr->params.quantize_output_tensor ? Qtrue : Qfalse;
         
     | 
| 
       710 
     | 
    
         
            -
              } 
     | 
| 
      
 738 
     | 
    
         
            +
              }
         
     | 
| 
       711 
739 
     | 
    
         
             
            };
         
     | 
| 
       712 
740 
     | 
    
         | 
| 
       713 
741 
     | 
    
         
             
            const rb_data_type_t RbLLaMAModelQuantizeParams::llama_model_quantize_params_type = {
         
     | 
| 
         @@ -724,13 +752,13 @@ class LLaMAModelWrapper { 
     | 
|
| 
       724 
752 
     | 
    
         
             
            public:
         
     | 
| 
       725 
753 
     | 
    
         
             
              struct llama_model* model;
         
     | 
| 
       726 
754 
     | 
    
         | 
| 
       727 
     | 
    
         
            -
              LLaMAModelWrapper() : model(NULL){} 
     | 
| 
      
 755 
     | 
    
         
            +
              LLaMAModelWrapper() : model(NULL) {}
         
     | 
| 
       728 
756 
     | 
    
         | 
| 
       729 
757 
     | 
    
         
             
              ~LLaMAModelWrapper() {
         
     | 
| 
       730 
758 
     | 
    
         
             
                if (model != NULL) {
         
     | 
| 
       731 
759 
     | 
    
         
             
                  llama_free_model(model);
         
     | 
| 
       732 
760 
     | 
    
         
             
                }
         
     | 
| 
       733 
     | 
    
         
            -
              } 
     | 
| 
      
 761 
     | 
    
         
            +
              }
         
     | 
| 
       734 
762 
     | 
    
         
             
            };
         
     | 
| 
       735 
763 
     | 
    
         | 
| 
       736 
764 
     | 
    
         
             
            class RbLLaMAModel {
         
     | 
| 
         @@ -764,6 +792,12 @@ public: 
     | 
|
| 
       764 
792 
     | 
    
         
             
                rb_define_method(rb_cLLaMAModel, "free", RUBY_METHOD_FUNC(_llama_model_free), 0);
         
     | 
| 
       765 
793 
     | 
    
         
             
                rb_define_method(rb_cLLaMAModel, "load", RUBY_METHOD_FUNC(_llama_model_load), -1);
         
     | 
| 
       766 
794 
     | 
    
         
             
                rb_define_method(rb_cLLaMAModel, "apply_lora_from_file", RUBY_METHOD_FUNC(_llama_model_apply_lora_from_file), -1);
         
     | 
| 
      
 795 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAModel, "n_vocab", RUBY_METHOD_FUNC(_llama_model_get_n_vocab_from_model), 0);
         
     | 
| 
      
 796 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAModel, "n_ctx", RUBY_METHOD_FUNC(_llama_model_get_n_ctx_from_model), 0);
         
     | 
| 
      
 797 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAModel, "n_embd", RUBY_METHOD_FUNC(_llama_model_get_n_embd_from_model), 0);
         
     | 
| 
      
 798 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAModel, "vocab", RUBY_METHOD_FUNC(_llama_model_get_vocab_from_model), -1);
         
     | 
| 
      
 799 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAModel, "token_to_str", RUBY_METHOD_FUNC(_llama_model_token_to_str_with_model), 1);
         
     | 
| 
      
 800 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAModel, "tokenize", RUBY_METHOD_FUNC(_llama_model_tokenize_with_model), -1);
         
     | 
| 
       767 
801 
     | 
    
         
             
              }
         
     | 
| 
       768 
802 
     | 
    
         | 
| 
       769 
803 
     | 
    
         
             
            private:
         
     | 
| 
         @@ -907,7 +941,110 @@ private: 
     | 
|
| 
       907 
941 
     | 
    
         
             
                  return Qnil;
         
     | 
| 
       908 
942 
     | 
    
         
             
                }
         
     | 
| 
       909 
943 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       910 
     | 
    
         
            -
              } 
     | 
| 
      
 944 
     | 
    
         
            +
              }
         
     | 
| 
      
 945 
     | 
    
         
            +
             
     | 
| 
      
 946 
     | 
    
         
            +
              static VALUE _llama_model_get_n_vocab_from_model(VALUE self) {
         
     | 
| 
      
 947 
     | 
    
         
            +
                LLaMAModelWrapper* ptr = get_llama_model(self);
         
     | 
| 
      
 948 
     | 
    
         
            +
                return INT2NUM(llama_n_vocab_from_model(ptr->model));
         
     | 
| 
      
 949 
     | 
    
         
            +
              }
         
     | 
| 
      
 950 
     | 
    
         
            +
             
     | 
| 
      
 951 
     | 
    
         
            +
              static VALUE _llama_model_get_n_ctx_from_model(VALUE self) {
         
     | 
| 
      
 952 
     | 
    
         
            +
                LLaMAModelWrapper* ptr = get_llama_model(self);
         
     | 
| 
      
 953 
     | 
    
         
            +
                return INT2NUM(llama_n_ctx_from_model(ptr->model));
         
     | 
| 
      
 954 
     | 
    
         
            +
              }
         
     | 
| 
      
 955 
     | 
    
         
            +
             
     | 
| 
      
 956 
     | 
    
         
            +
              static VALUE _llama_model_get_n_embd_from_model(VALUE self) {
         
     | 
| 
      
 957 
     | 
    
         
            +
                LLaMAModelWrapper* ptr = get_llama_model(self);
         
     | 
| 
      
 958 
     | 
    
         
            +
                return INT2NUM(llama_n_embd_from_model(ptr->model));
         
     | 
| 
      
 959 
     | 
    
         
            +
              }
         
     | 
| 
      
 960 
     | 
    
         
            +
             
     | 
| 
      
 961 
     | 
    
         
            +
              static VALUE _llama_model_get_vocab_from_model(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
      
 962 
     | 
    
         
            +
                VALUE kw_args = Qnil;
         
     | 
| 
      
 963 
     | 
    
         
            +
                ID kw_table[1] = { rb_intern("capacity") };
         
     | 
| 
      
 964 
     | 
    
         
            +
                VALUE kw_values[1] = { Qundef };
         
     | 
| 
      
 965 
     | 
    
         
            +
                rb_scan_args(argc, argv, ":", &kw_args);
         
     | 
| 
      
 966 
     | 
    
         
            +
                rb_get_kwargs(kw_args, kw_table, 1, 0, kw_values);
         
     | 
| 
      
 967 
     | 
    
         
            +
             
     | 
| 
      
 968 
     | 
    
         
            +
                if (!RB_INTEGER_TYPE_P(kw_values[0])) {
         
     | 
| 
      
 969 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "capacity must be an integer");
         
     | 
| 
      
 970 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 971 
     | 
    
         
            +
                }
         
     | 
| 
      
 972 
     | 
    
         
            +
             
     | 
| 
      
 973 
     | 
    
         
            +
                const int capacity = NUM2INT(kw_values[0]);
         
     | 
| 
      
 974 
     | 
    
         
            +
             
     | 
| 
      
 975 
     | 
    
         
            +
                LLaMAModelWrapper* ptr = get_llama_model(self);
         
     | 
| 
      
 976 
     | 
    
         
            +
                const int n = std::min(capacity, llama_n_vocab_from_model(ptr->model));
         
     | 
| 
      
 977 
     | 
    
         
            +
                const char** vocabs = ALLOCA_N(const char*, n);
         
     | 
| 
      
 978 
     | 
    
         
            +
                float* scores = ALLOCA_N(float, n);
         
     | 
| 
      
 979 
     | 
    
         
            +
             
     | 
| 
      
 980 
     | 
    
         
            +
                llama_get_vocab_from_model(ptr->model, vocabs, scores, capacity);
         
     | 
| 
      
 981 
     | 
    
         
            +
             
     | 
| 
      
 982 
     | 
    
         
            +
                VALUE vocabs_ary = rb_ary_new();
         
     | 
| 
      
 983 
     | 
    
         
            +
                VALUE scores_ary = rb_ary_new();
         
     | 
| 
      
 984 
     | 
    
         
            +
             
     | 
| 
      
 985 
     | 
    
         
            +
                for (int i = 0; i < n; i++) {
         
     | 
| 
      
 986 
     | 
    
         
            +
                  rb_ary_push(vocabs_ary, rb_str_new_cstr(vocabs[i]));
         
     | 
| 
      
 987 
     | 
    
         
            +
                  rb_ary_push(scores_ary, DBL2NUM(scores[i]));
         
     | 
| 
      
 988 
     | 
    
         
            +
                }
         
     | 
| 
      
 989 
     | 
    
         
            +
             
     | 
| 
      
 990 
     | 
    
         
            +
                VALUE ret = rb_ary_new3(2, vocabs_ary, scores_ary);
         
     | 
| 
      
 991 
     | 
    
         
            +
             
     | 
| 
      
 992 
     | 
    
         
            +
                return ret;
         
     | 
| 
      
 993 
     | 
    
         
            +
              }
         
     | 
| 
      
 994 
     | 
    
         
            +
             
     | 
| 
      
 995 
     | 
    
         
            +
              static VALUE _llama_model_token_to_str_with_model(VALUE self, VALUE token_) {
         
     | 
| 
      
 996 
     | 
    
         
            +
                if (!RB_INTEGER_TYPE_P(token_)) {
         
     | 
| 
      
 997 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "token must be an integer");
         
     | 
| 
      
 998 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 999 
     | 
    
         
            +
                }
         
     | 
| 
      
 1000 
     | 
    
         
            +
                const llama_token token = NUM2INT(token_);
         
     | 
| 
      
 1001 
     | 
    
         
            +
                LLaMAModelWrapper* ptr = get_llama_model(self);
         
     | 
| 
      
 1002 
     | 
    
         
            +
                const char* str = llama_token_to_str_with_model(ptr->model, token);
         
     | 
| 
      
 1003 
     | 
    
         
            +
                return rb_str_new_cstr(str);
         
     | 
| 
      
 1004 
     | 
    
         
            +
              }
         
     | 
| 
      
 1005 
     | 
    
         
            +
             
     | 
| 
      
 1006 
     | 
    
         
            +
              static VALUE _llama_model_tokenize_with_model(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
      
 1007 
     | 
    
         
            +
                VALUE kw_args = Qnil;
         
     | 
| 
      
 1008 
     | 
    
         
            +
                ID kw_table[3] = { rb_intern("text"), rb_intern("n_max_tokens"), rb_intern("add_bos") };
         
     | 
| 
      
 1009 
     | 
    
         
            +
                VALUE kw_values[3] = { Qundef, Qundef, Qundef };
         
     | 
| 
      
 1010 
     | 
    
         
            +
                rb_scan_args(argc, argv, ":", &kw_args);
         
     | 
| 
      
 1011 
     | 
    
         
            +
                rb_get_kwargs(kw_args, kw_table, 1, 2, kw_values);
         
     | 
| 
      
 1012 
     | 
    
         
            +
             
     | 
| 
      
 1013 
     | 
    
         
            +
                if (!RB_TYPE_P(kw_values[0], T_STRING)) {
         
     | 
| 
      
 1014 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "text must be a String");
         
     | 
| 
      
 1015 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1016 
     | 
    
         
            +
                }
         
     | 
| 
      
 1017 
     | 
    
         
            +
                if (kw_values[1] != Qundef && !RB_INTEGER_TYPE_P(kw_values[1])) {
         
     | 
| 
      
 1018 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "n_max_tokens must be an integer");
         
     | 
| 
      
 1019 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1020 
     | 
    
         
            +
                }
         
     | 
| 
      
 1021 
     | 
    
         
            +
                if (kw_values[2] != Qundef && (kw_values[2] != Qtrue && kw_values[2] != Qfalse)) {
         
     | 
| 
      
 1022 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "add_bos must be a boolean");
         
     | 
| 
      
 1023 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1024 
     | 
    
         
            +
                }
         
     | 
| 
      
 1025 
     | 
    
         
            +
             
     | 
| 
      
 1026 
     | 
    
         
            +
                VALUE text_ = kw_values[0];
         
     | 
| 
      
 1027 
     | 
    
         
            +
                std::string text = StringValueCStr(text_);
         
     | 
| 
      
 1028 
     | 
    
         
            +
                const bool add_bos = kw_values[2] == Qtrue ? true : false;
         
     | 
| 
      
 1029 
     | 
    
         
            +
                const int n_max_tokens = kw_values[1] != Qundef ? NUM2INT(kw_values[1]) : text.size() + (add_bos ? 1 : 0);
         
     | 
| 
      
 1030 
     | 
    
         
            +
             
     | 
| 
      
 1031 
     | 
    
         
            +
                llama_token* tokens = ALLOCA_N(llama_token, n_max_tokens);
         
     | 
| 
      
 1032 
     | 
    
         
            +
                LLaMAModelWrapper* ptr = get_llama_model(self);
         
     | 
| 
      
 1033 
     | 
    
         
            +
                const int n_tokens = llama_tokenize_with_model(ptr->model, text.c_str(), tokens, n_max_tokens, add_bos);
         
     | 
| 
      
 1034 
     | 
    
         
            +
             
     | 
| 
      
 1035 
     | 
    
         
            +
                if (n_tokens < 0) {
         
     | 
| 
      
 1036 
     | 
    
         
            +
                  rb_raise(rb_eRuntimeError, "failed to tokenize. The numebr of tokens (%d) is greater than n_max_tokens.", -n_tokens);
         
     | 
| 
      
 1037 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1038 
     | 
    
         
            +
                }
         
     | 
| 
      
 1039 
     | 
    
         
            +
             
     | 
| 
      
 1040 
     | 
    
         
            +
                VALUE ret = rb_ary_new2(n_tokens);
         
     | 
| 
      
 1041 
     | 
    
         
            +
                for (int i = 0; i < n_tokens; i++) {
         
     | 
| 
      
 1042 
     | 
    
         
            +
                  rb_ary_store(ret, i, INT2NUM(tokens[i]));
         
     | 
| 
      
 1043 
     | 
    
         
            +
                }
         
     | 
| 
      
 1044 
     | 
    
         
            +
             
     | 
| 
      
 1045 
     | 
    
         
            +
                RB_GC_GUARD(text_);
         
     | 
| 
      
 1046 
     | 
    
         
            +
                return ret;
         
     | 
| 
      
 1047 
     | 
    
         
            +
              }
         
     | 
| 
       911 
1048 
     | 
    
         
             
            };
         
     | 
| 
       912 
1049 
     | 
    
         | 
| 
       913 
1050 
     | 
    
         
             
            const rb_data_type_t RbLLaMAModel::llama_model_type = {
         
     | 
| 
         @@ -924,13 +1061,13 @@ class LLaMAContextWrapper { 
     | 
|
| 
       924 
1061 
     | 
    
         
             
            public:
         
     | 
| 
       925 
1062 
     | 
    
         
             
              struct llama_context* ctx;
         
     | 
| 
       926 
1063 
     | 
    
         | 
| 
       927 
     | 
    
         
            -
              LLaMAContextWrapper() : ctx(NULL){} 
     | 
| 
      
 1064 
     | 
    
         
            +
              LLaMAContextWrapper() : ctx(NULL) {}
         
     | 
| 
       928 
1065 
     | 
    
         | 
| 
       929 
1066 
     | 
    
         
             
              ~LLaMAContextWrapper() {
         
     | 
| 
       930 
1067 
     | 
    
         
             
                if (ctx != NULL) {
         
     | 
| 
       931 
1068 
     | 
    
         
             
                  llama_free(ctx);
         
     | 
| 
       932 
1069 
     | 
    
         
             
                }
         
     | 
| 
       933 
     | 
    
         
            -
              } 
     | 
| 
      
 1070 
     | 
    
         
            +
              }
         
     | 
| 
       934 
1071 
     | 
    
         
             
            };
         
     | 
| 
       935 
1072 
     | 
    
         | 
| 
       936 
1073 
     | 
    
         
             
            class RbLLaMAContext {
         
     | 
| 
         @@ -939,22 +1076,22 @@ public: 
     | 
|
| 
       939 
1076 
     | 
    
         
             
                LLaMAContextWrapper* ptr = (LLaMAContextWrapper*)ruby_xmalloc(sizeof(LLaMAContextWrapper));
         
     | 
| 
       940 
1077 
     | 
    
         
             
                new (ptr) LLaMAContextWrapper();
         
     | 
| 
       941 
1078 
     | 
    
         
             
                return TypedData_Wrap_Struct(self, &llama_context_type, ptr);
         
     | 
| 
       942 
     | 
    
         
            -
              } 
     | 
| 
      
 1079 
     | 
    
         
            +
              }
         
     | 
| 
       943 
1080 
     | 
    
         | 
| 
       944 
1081 
     | 
    
         
             
              static void llama_context_free(void* ptr) {
         
     | 
| 
       945 
1082 
     | 
    
         
             
                ((LLaMAContextWrapper*)ptr)->~LLaMAContextWrapper();
         
     | 
| 
       946 
1083 
     | 
    
         
             
                ruby_xfree(ptr);
         
     | 
| 
       947 
     | 
    
         
            -
              } 
     | 
| 
      
 1084 
     | 
    
         
            +
              }
         
     | 
| 
       948 
1085 
     | 
    
         | 
| 
       949 
1086 
     | 
    
         
             
              static size_t llama_context_size(const void* ptr) {
         
     | 
| 
       950 
1087 
     | 
    
         
             
                return sizeof(*((LLaMAContextWrapper*)ptr));
         
     | 
| 
       951 
     | 
    
         
            -
              } 
     | 
| 
      
 1088 
     | 
    
         
            +
              }
         
     | 
| 
       952 
1089 
     | 
    
         | 
| 
       953 
1090 
     | 
    
         
             
              static LLaMAContextWrapper* get_llama_context(VALUE self) {
         
     | 
| 
       954 
1091 
     | 
    
         
             
                LLaMAContextWrapper* ptr;
         
     | 
| 
       955 
1092 
     | 
    
         
             
                TypedData_Get_Struct(self, LLaMAContextWrapper, &llama_context_type, ptr);
         
     | 
| 
       956 
1093 
     | 
    
         
             
                return ptr;
         
     | 
| 
       957 
     | 
    
         
            -
              } 
     | 
| 
      
 1094 
     | 
    
         
            +
              }
         
     | 
| 
       958 
1095 
     | 
    
         | 
| 
       959 
1096 
     | 
    
         
             
              static void define_class(VALUE outer) {
         
     | 
| 
       960 
1097 
     | 
    
         
             
                rb_cLLaMAContext = rb_define_class_under(outer, "Context", rb_cObject);
         
     | 
| 
         @@ -980,6 +1117,7 @@ public: 
     | 
|
| 
       980 
1117 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "save_session_file", RUBY_METHOD_FUNC(_llama_context_save_session_file), -1);
         
     | 
| 
       981 
1118 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_repetition_penalty", RUBY_METHOD_FUNC(_llama_context_sample_repetition_penalty), -1);
         
     | 
| 
       982 
1119 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_frequency_and_presence_penalties", RUBY_METHOD_FUNC(_llama_context_sample_frequency_and_presence_penalties), -1);
         
     | 
| 
      
 1120 
     | 
    
         
            +
                rb_define_method(rb_cLLaMAContext, "sample_classifier_free_guidance", RUBY_METHOD_FUNC(_llama_context_sample_classifier_free_guidance), -1);
         
     | 
| 
       983 
1121 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_softmax", RUBY_METHOD_FUNC(_llama_context_sample_softmax), 1);
         
     | 
| 
       984 
1122 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_k), -1);
         
     | 
| 
       985 
1123 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_top_p", RUBY_METHOD_FUNC(_llama_context_sample_top_p), -1);
         
     | 
| 
         @@ -990,7 +1128,7 @@ public: 
     | 
|
| 
       990 
1128 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_token_mirostat_v2", RUBY_METHOD_FUNC(_llama_context_sample_token_mirostat_v2), -1);
         
     | 
| 
       991 
1129 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_token_greedy", RUBY_METHOD_FUNC(_llama_context_sample_token_greedy), 1);
         
     | 
| 
       992 
1130 
     | 
    
         
             
                rb_define_method(rb_cLLaMAContext, "sample_token", RUBY_METHOD_FUNC(_llama_context_sample_token), 1);
         
     | 
| 
       993 
     | 
    
         
            -
              } 
     | 
| 
      
 1131 
     | 
    
         
            +
              }
         
     | 
| 
       994 
1132 
     | 
    
         | 
| 
       995 
1133 
     | 
    
         
             
            private:
         
     | 
| 
       996 
1134 
     | 
    
         
             
              static const rb_data_type_t llama_context_type;
         
     | 
| 
         @@ -1029,7 +1167,7 @@ private: 
     | 
|
| 
       1029 
1167 
     | 
    
         
             
                rb_iv_set(self, "@has_evaluated", Qfalse);
         
     | 
| 
       1030 
1168 
     | 
    
         | 
| 
       1031 
1169 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1032 
     | 
    
         
            -
              } 
     | 
| 
      
 1170 
     | 
    
         
            +
              }
         
     | 
| 
       1033 
1171 
     | 
    
         | 
| 
       1034 
1172 
     | 
    
         
             
              static VALUE _llama_context_eval(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1035 
1173 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1084,7 +1222,7 @@ private: 
     | 
|
| 
       1084 
1222 
     | 
    
         
             
                rb_iv_set(self, "@has_evaluated", Qtrue);
         
     | 
| 
       1085 
1223 
     | 
    
         | 
| 
       1086 
1224 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1087 
     | 
    
         
            -
              } 
     | 
| 
      
 1225 
     | 
    
         
            +
              }
         
     | 
| 
       1088 
1226 
     | 
    
         | 
| 
       1089 
1227 
     | 
    
         
             
              static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1090 
1228 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1157,7 +1295,7 @@ private: 
     | 
|
| 
       1157 
1295 
     | 
    
         
             
                }
         
     | 
| 
       1158 
1296 
     | 
    
         
             
                RB_GC_GUARD(fname_);
         
     | 
| 
       1159 
1297 
     | 
    
         
             
                return Qtrue;
         
     | 
| 
       1160 
     | 
    
         
            -
              } 
     | 
| 
      
 1298 
     | 
    
         
            +
              }
         
     | 
| 
       1161 
1299 
     | 
    
         | 
| 
       1162 
1300 
     | 
    
         
             
              static VALUE _llama_context_tokenize(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1163 
1301 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1203,7 +1341,7 @@ private: 
     | 
|
| 
       1203 
1341 
     | 
    
         | 
| 
       1204 
1342 
     | 
    
         
             
                RB_GC_GUARD(text_);
         
     | 
| 
       1205 
1343 
     | 
    
         
             
                return output;
         
     | 
| 
       1206 
     | 
    
         
            -
              } 
     | 
| 
      
 1344 
     | 
    
         
            +
              }
         
     | 
| 
       1207 
1345 
     | 
    
         | 
| 
       1208 
1346 
     | 
    
         
             
              static VALUE _llama_context_token_to_str(VALUE self, VALUE token_) {
         
     | 
| 
       1209 
1347 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1214,7 +1352,7 @@ private: 
     | 
|
| 
       1214 
1352 
     | 
    
         
             
                const llama_token token = NUM2INT(token_);
         
     | 
| 
       1215 
1353 
     | 
    
         
             
                const char* str = llama_token_to_str(ptr->ctx, token);
         
     | 
| 
       1216 
1354 
     | 
    
         
             
                return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
         
     | 
| 
       1217 
     | 
    
         
            -
              } 
     | 
| 
      
 1355 
     | 
    
         
            +
              }
         
     | 
| 
       1218 
1356 
     | 
    
         | 
| 
       1219 
1357 
     | 
    
         
             
              static VALUE _llama_context_logits(VALUE self) {
         
     | 
| 
       1220 
1358 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1239,7 +1377,7 @@ private: 
     | 
|
| 
       1239 
1377 
     | 
    
         
             
                }
         
     | 
| 
       1240 
1378 
     | 
    
         | 
| 
       1241 
1379 
     | 
    
         
             
                return output;
         
     | 
| 
       1242 
     | 
    
         
            -
              } 
     | 
| 
      
 1380 
     | 
    
         
            +
              }
         
     | 
| 
       1243 
1381 
     | 
    
         | 
| 
       1244 
1382 
     | 
    
         
             
              static VALUE _llama_context_embeddings(VALUE self) {
         
     | 
| 
       1245 
1383 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1267,7 +1405,7 @@ private: 
     | 
|
| 
       1267 
1405 
     | 
    
         
             
                }
         
     | 
| 
       1268 
1406 
     | 
    
         | 
| 
       1269 
1407 
     | 
    
         
             
                return output;
         
     | 
| 
       1270 
     | 
    
         
            -
              } 
     | 
| 
      
 1408 
     | 
    
         
            +
              }
         
     | 
| 
       1271 
1409 
     | 
    
         | 
| 
       1272 
1410 
     | 
    
         
             
              static VALUE _llama_context_vocab(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1273 
1411 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1304,7 +1442,7 @@ private: 
     | 
|
| 
       1304 
1442 
     | 
    
         
             
                }
         
     | 
| 
       1305 
1443 
     | 
    
         | 
| 
       1306 
1444 
     | 
    
         
             
                return rb_ary_new_from_args(2, ret_strings, ret_scores);
         
     | 
| 
       1307 
     | 
    
         
            -
              } 
     | 
| 
      
 1445 
     | 
    
         
            +
              }
         
     | 
| 
       1308 
1446 
     | 
    
         | 
| 
       1309 
1447 
     | 
    
         
             
              static VALUE _llama_context_n_vocab(VALUE self) {
         
     | 
| 
       1310 
1448 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1313,7 +1451,7 @@ private: 
     | 
|
| 
       1313 
1451 
     | 
    
         
             
                  return Qnil;
         
     | 
| 
       1314 
1452 
     | 
    
         
             
                }
         
     | 
| 
       1315 
1453 
     | 
    
         
             
                return INT2NUM(llama_n_vocab(ptr->ctx));
         
     | 
| 
       1316 
     | 
    
         
            -
              } 
     | 
| 
      
 1454 
     | 
    
         
            +
              }
         
     | 
| 
       1317 
1455 
     | 
    
         | 
| 
       1318 
1456 
     | 
    
         
             
              static VALUE _llama_context_n_ctx(VALUE self) {
         
     | 
| 
       1319 
1457 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1322,7 +1460,7 @@ private: 
     | 
|
| 
       1322 
1460 
     | 
    
         
             
                  return Qnil;
         
     | 
| 
       1323 
1461 
     | 
    
         
             
                }
         
     | 
| 
       1324 
1462 
     | 
    
         
             
                return INT2NUM(llama_n_ctx(ptr->ctx));
         
     | 
| 
       1325 
     | 
    
         
            -
              } 
     | 
| 
      
 1463 
     | 
    
         
            +
              }
         
     | 
| 
       1326 
1464 
     | 
    
         | 
| 
       1327 
1465 
     | 
    
         
             
              static VALUE _llama_context_n_embd(VALUE self) {
         
     | 
| 
       1328 
1466 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1331,7 +1469,7 @@ private: 
     | 
|
| 
       1331 
1469 
     | 
    
         
             
                  return Qnil;
         
     | 
| 
       1332 
1470 
     | 
    
         
             
                }
         
     | 
| 
       1333 
1471 
     | 
    
         
             
                return INT2NUM(llama_n_embd(ptr->ctx));
         
     | 
| 
       1334 
     | 
    
         
            -
              } 
     | 
| 
      
 1472 
     | 
    
         
            +
              }
         
     | 
| 
       1335 
1473 
     | 
    
         | 
| 
       1336 
1474 
     | 
    
         
             
              static VALUE _llama_context_get_timings(VALUE self) {
         
     | 
| 
       1337 
1475 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1353,7 +1491,7 @@ private: 
     | 
|
| 
       1353 
1491 
     | 
    
         
             
                }
         
     | 
| 
       1354 
1492 
     | 
    
         
             
                llama_print_timings(ptr->ctx);
         
     | 
| 
       1355 
1493 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1356 
     | 
    
         
            -
              } 
     | 
| 
      
 1494 
     | 
    
         
            +
              }
         
     | 
| 
       1357 
1495 
     | 
    
         | 
| 
       1358 
1496 
     | 
    
         
             
              static VALUE _llama_context_reset_timings(VALUE self) {
         
     | 
| 
       1359 
1497 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1363,7 +1501,7 @@ private: 
     | 
|
| 
       1363 
1501 
     | 
    
         
             
                }
         
     | 
| 
       1364 
1502 
     | 
    
         
             
                llama_reset_timings(ptr->ctx);
         
     | 
| 
       1365 
1503 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1366 
     | 
    
         
            -
              } 
     | 
| 
      
 1504 
     | 
    
         
            +
              }
         
     | 
| 
       1367 
1505 
     | 
    
         | 
| 
       1368 
1506 
     | 
    
         
             
              static VALUE _llama_context_kv_cache_token_count(VALUE self) {
         
     | 
| 
       1369 
1507 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1372,7 +1510,7 @@ private: 
     | 
|
| 
       1372 
1510 
     | 
    
         
             
                  return Qnil;
         
     | 
| 
       1373 
1511 
     | 
    
         
             
                }
         
     | 
| 
       1374 
1512 
     | 
    
         
             
                return INT2NUM(llama_get_kv_cache_token_count(ptr->ctx));
         
     | 
| 
       1375 
     | 
    
         
            -
              } 
     | 
| 
      
 1513 
     | 
    
         
            +
              }
         
     | 
| 
       1376 
1514 
     | 
    
         | 
| 
       1377 
1515 
     | 
    
         
             
              static VALUE _llama_context_set_rng_seed(VALUE self, VALUE seed_) {
         
     | 
| 
       1378 
1516 
     | 
    
         
             
                LLaMAContextWrapper* ptr = get_llama_context(self);
         
     | 
| 
         @@ -1387,7 +1525,7 @@ private: 
     | 
|
| 
       1387 
1525 
     | 
    
         
             
                const uint32_t seed = NUM2INT(seed_);
         
     | 
| 
       1388 
1526 
     | 
    
         
             
                llama_set_rng_seed(ptr->ctx, seed);
         
     | 
| 
       1389 
1527 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1390 
     | 
    
         
            -
              } 
     | 
| 
      
 1528 
     | 
    
         
            +
              }
         
     | 
| 
       1391 
1529 
     | 
    
         | 
| 
       1392 
1530 
     | 
    
         
             
              static VALUE _llama_context_load_session_file(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1393 
1531 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1525,7 +1663,7 @@ private: 
     | 
|
| 
       1525 
1663 
     | 
    
         
             
                llama_sample_repetition_penalty(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size, penalty);
         
     | 
| 
       1526 
1664 
     | 
    
         | 
| 
       1527 
1665 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1528 
     | 
    
         
            -
              } 
     | 
| 
      
 1666 
     | 
    
         
            +
              }
         
     | 
| 
       1529 
1667 
     | 
    
         | 
| 
       1530 
1668 
     | 
    
         
             
              static VALUE _llama_context_sample_frequency_and_presence_penalties(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1531 
1669 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1576,7 +1714,47 @@ private: 
     | 
|
| 
       1576 
1714 
     | 
    
         
             
                llama_sample_frequency_and_presence_penalties(ctx_ptr->ctx, &(cnd_ptr->array), last_n_tokens_data.data(), last_tokens_size, alpha_frequency, alpha_presence);
         
     | 
| 
       1577 
1715 
     | 
    
         | 
| 
       1578 
1716 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1579 
     | 
    
         
            -
              } 
     | 
| 
      
 1717 
     | 
    
         
            +
              }
         
     | 
| 
      
 1718 
     | 
    
         
            +
             
     | 
| 
      
 1719 
     | 
    
         
            +
              static VALUE _llama_context_sample_classifier_free_guidance(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
      
 1720 
     | 
    
         
            +
                VALUE kw_args = Qnil;
         
     | 
| 
      
 1721 
     | 
    
         
            +
                ID kw_table[2] = { rb_intern("guidance"), rb_intern("scale") };
         
     | 
| 
      
 1722 
     | 
    
         
            +
                VALUE kw_values[2] = { Qundef, Qundef };
         
     | 
| 
      
 1723 
     | 
    
         
            +
                VALUE candidates = Qnil;
         
     | 
| 
      
 1724 
     | 
    
         
            +
                rb_scan_args(argc, argv, "1:", &candidates, &kw_args);
         
     | 
| 
      
 1725 
     | 
    
         
            +
                rb_get_kwargs(kw_args, kw_table, 2, 0, kw_values);
         
     | 
| 
      
 1726 
     | 
    
         
            +
             
     | 
| 
      
 1727 
     | 
    
         
            +
                if (!rb_obj_is_kind_of(kw_values[0], rb_cLLaMAContext)) {
         
     | 
| 
      
 1728 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "guidance must be a Context");
         
     | 
| 
      
 1729 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1730 
     | 
    
         
            +
                }
         
     | 
| 
      
 1731 
     | 
    
         
            +
                if (!RB_FLOAT_TYPE_P(kw_values[1])) {
         
     | 
| 
      
 1732 
     | 
    
         
            +
                  rb_raise(rb_eArgError, "scale must be a float");
         
     | 
| 
      
 1733 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1734 
     | 
    
         
            +
                }
         
     | 
| 
      
 1735 
     | 
    
         
            +
             
     | 
| 
      
 1736 
     | 
    
         
            +
                LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
         
     | 
| 
      
 1737 
     | 
    
         
            +
                if (ctx_ptr->ctx == NULL) {
         
     | 
| 
      
 1738 
     | 
    
         
            +
                  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
         
     | 
| 
      
 1739 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1740 
     | 
    
         
            +
                }
         
     | 
| 
      
 1741 
     | 
    
         
            +
                LLaMATokenDataArrayWrapper* cnd_ptr = RbLLaMATokenDataArray::get_llama_token_data_array(candidates);
         
     | 
| 
      
 1742 
     | 
    
         
            +
                if (cnd_ptr->array.data == nullptr) {
         
     | 
| 
      
 1743 
     | 
    
         
            +
                  rb_raise(rb_eRuntimeError, "TokenDataArray is empty");
         
     | 
| 
      
 1744 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1745 
     | 
    
         
            +
                }
         
     | 
| 
      
 1746 
     | 
    
         
            +
             
     | 
| 
      
 1747 
     | 
    
         
            +
                LLaMAContextWrapper* guidance_ptr = get_llama_context(kw_values[0]);
         
     | 
| 
      
 1748 
     | 
    
         
            +
                if (guidance_ptr->ctx == NULL) {
         
     | 
| 
      
 1749 
     | 
    
         
            +
                  rb_raise(rb_eRuntimeError, "guidance context is not initialized");
         
     | 
| 
      
 1750 
     | 
    
         
            +
                  return Qnil;
         
     | 
| 
      
 1751 
     | 
    
         
            +
                }
         
     | 
| 
      
 1752 
     | 
    
         
            +
                const float scale = NUM2DBL(kw_values[1]);
         
     | 
| 
      
 1753 
     | 
    
         
            +
             
     | 
| 
      
 1754 
     | 
    
         
            +
                llama_sample_classifier_free_guidance(ctx_ptr->ctx, &(cnd_ptr->array), guidance_ptr->ctx, scale);
         
     | 
| 
      
 1755 
     | 
    
         
            +
             
     | 
| 
      
 1756 
     | 
    
         
            +
                return Qnil;
         
     | 
| 
      
 1757 
     | 
    
         
            +
              }
         
     | 
| 
       1580 
1758 
     | 
    
         | 
| 
       1581 
1759 
     | 
    
         
             
              static VALUE _llama_context_sample_softmax(VALUE self, VALUE candidates) {
         
     | 
| 
       1582 
1760 
     | 
    
         
             
                if (!rb_obj_is_kind_of(candidates, rb_cLLaMATokenDataArray)) {
         
     | 
| 
         @@ -1598,7 +1776,7 @@ private: 
     | 
|
| 
       1598 
1776 
     | 
    
         
             
                llama_sample_softmax(ctx_ptr->ctx, &(cnd_ptr->array));
         
     | 
| 
       1599 
1777 
     | 
    
         | 
| 
       1600 
1778 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1601 
     | 
    
         
            -
              } 
     | 
| 
      
 1779 
     | 
    
         
            +
              }
         
     | 
| 
       1602 
1780 
     | 
    
         | 
| 
       1603 
1781 
     | 
    
         
             
              static VALUE _llama_context_sample_top_k(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1604 
1782 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1637,7 +1815,7 @@ private: 
     | 
|
| 
       1637 
1815 
     | 
    
         
             
                llama_sample_top_k(ctx_ptr->ctx, &(cnd_ptr->array), k, min_keep);
         
     | 
| 
       1638 
1816 
     | 
    
         | 
| 
       1639 
1817 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1640 
     | 
    
         
            -
              } 
     | 
| 
      
 1818 
     | 
    
         
            +
              }
         
     | 
| 
       1641 
1819 
     | 
    
         | 
| 
       1642 
1820 
     | 
    
         
             
              static VALUE _llama_context_sample_top_p(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1643 
1821 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1676,7 +1854,7 @@ private: 
     | 
|
| 
       1676 
1854 
     | 
    
         
             
                llama_sample_top_p(ctx_ptr->ctx, &(cnd_ptr->array), prob, min_keep);
         
     | 
| 
       1677 
1855 
     | 
    
         | 
| 
       1678 
1856 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1679 
     | 
    
         
            -
              } 
     | 
| 
      
 1857 
     | 
    
         
            +
              }
         
     | 
| 
       1680 
1858 
     | 
    
         | 
| 
       1681 
1859 
     | 
    
         
             
              static VALUE _llama_context_sample_tail_free(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1682 
1860 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1715,7 +1893,7 @@ private: 
     | 
|
| 
       1715 
1893 
     | 
    
         
             
                llama_sample_tail_free(ctx_ptr->ctx, &(cnd_ptr->array), z, min_keep);
         
     | 
| 
       1716 
1894 
     | 
    
         | 
| 
       1717 
1895 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1718 
     | 
    
         
            -
              } 
     | 
| 
      
 1896 
     | 
    
         
            +
              }
         
     | 
| 
       1719 
1897 
     | 
    
         | 
| 
       1720 
1898 
     | 
    
         
             
              static VALUE _llama_context_sample_typical(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1721 
1899 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1754,7 +1932,7 @@ private: 
     | 
|
| 
       1754 
1932 
     | 
    
         
             
                llama_sample_typical(ctx_ptr->ctx, &(cnd_ptr->array), prob, min_keep);
         
     | 
| 
       1755 
1933 
     | 
    
         | 
| 
       1756 
1934 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1757 
     | 
    
         
            -
              } 
     | 
| 
      
 1935 
     | 
    
         
            +
              }
         
     | 
| 
       1758 
1936 
     | 
    
         | 
| 
       1759 
1937 
     | 
    
         
             
              static VALUE _llama_context_sample_temperature(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1760 
1938 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1788,7 +1966,7 @@ private: 
     | 
|
| 
       1788 
1966 
     | 
    
         
             
                llama_sample_temperature(ctx_ptr->ctx, &(cnd_ptr->array), temperature);
         
     | 
| 
       1789 
1967 
     | 
    
         | 
| 
       1790 
1968 
     | 
    
         
             
                return Qnil;
         
     | 
| 
       1791 
     | 
    
         
            -
              } 
     | 
| 
      
 1969 
     | 
    
         
            +
              }
         
     | 
| 
       1792 
1970 
     | 
    
         | 
| 
       1793 
1971 
     | 
    
         
             
              static VALUE _llama_context_sample_token_mirostat(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1794 
1972 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1840,7 +2018,7 @@ private: 
     | 
|
| 
       1840 
2018 
     | 
    
         
             
                rb_ary_store(ret, 0, INT2NUM(id));
         
     | 
| 
       1841 
2019 
     | 
    
         
             
                rb_ary_store(ret, 1, DBL2NUM(mu));
         
     | 
| 
       1842 
2020 
     | 
    
         
             
                return ret;
         
     | 
| 
       1843 
     | 
    
         
            -
              } 
     | 
| 
      
 2021 
     | 
    
         
            +
              }
         
     | 
| 
       1844 
2022 
     | 
    
         | 
| 
       1845 
2023 
     | 
    
         
             
              static VALUE _llama_context_sample_token_mirostat_v2(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1846 
2024 
     | 
    
         
             
                VALUE kw_args = Qnil;
         
     | 
| 
         @@ -1887,7 +2065,7 @@ private: 
     | 
|
| 
       1887 
2065 
     | 
    
         
             
                rb_ary_store(ret, 0, INT2NUM(id));
         
     | 
| 
       1888 
2066 
     | 
    
         
             
                rb_ary_store(ret, 1, DBL2NUM(mu));
         
     | 
| 
       1889 
2067 
     | 
    
         
             
                return ret;
         
     | 
| 
       1890 
     | 
    
         
            -
              } 
     | 
| 
      
 2068 
     | 
    
         
            +
              }
         
     | 
| 
       1891 
2069 
     | 
    
         | 
| 
       1892 
2070 
     | 
    
         
             
              static VALUE _llama_context_sample_token_greedy(VALUE self, VALUE candidates) {
         
     | 
| 
       1893 
2071 
     | 
    
         
             
                LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
         
     | 
| 
         @@ -1906,7 +2084,7 @@ private: 
     | 
|
| 
       1906 
2084 
     | 
    
         
             
                }
         
     | 
| 
       1907 
2085 
     | 
    
         
             
                llama_token id = llama_sample_token_greedy(ctx_ptr->ctx, &(cnd_ptr->array));
         
     | 
| 
       1908 
2086 
     | 
    
         
             
                return INT2NUM(id);
         
     | 
| 
       1909 
     | 
    
         
            -
              } 
     | 
| 
      
 2087 
     | 
    
         
            +
              }
         
     | 
| 
       1910 
2088 
     | 
    
         | 
| 
       1911 
2089 
     | 
    
         
             
              static VALUE _llama_context_sample_token(VALUE self, VALUE candidates) {
         
     | 
| 
       1912 
2090 
     | 
    
         
             
                LLaMAContextWrapper* ctx_ptr = get_llama_context(self);
         
     | 
| 
         @@ -1925,7 +2103,7 @@ private: 
     | 
|
| 
       1925 
2103 
     | 
    
         
             
                }
         
     | 
| 
       1926 
2104 
     | 
    
         
             
                llama_token id = llama_sample_token(ctx_ptr->ctx, &(cnd_ptr->array));
         
     | 
| 
       1927 
2105 
     | 
    
         
             
                return INT2NUM(id);
         
     | 
| 
       1928 
     | 
    
         
            -
              } 
     | 
| 
      
 2106 
     | 
    
         
            +
              }
         
     | 
| 
       1929 
2107 
     | 
    
         
             
            };
         
     | 
| 
       1930 
2108 
     | 
    
         | 
| 
       1931 
2109 
     | 
    
         
             
            const rb_data_type_t RbLLaMAContext::llama_context_type = {
         
     | 
| 
         @@ -1940,7 +2118,7 @@ const rb_data_type_t RbLLaMAContext::llama_context_type = { 
     | 
|
| 
       1940 
2118 
     | 
    
         | 
| 
       1941 
2119 
     | 
    
         
             
            // module functions
         
     | 
| 
       1942 
2120 
     | 
    
         | 
| 
       1943 
     | 
    
         
            -
            static VALUE  
     | 
| 
      
 2121 
     | 
    
         
            +
            static VALUE rb_llama_llama_backend_init(int argc, VALUE* argv, VALUE self) {
         
     | 
| 
       1944 
2122 
     | 
    
         
             
              VALUE kw_args = Qnil;
         
     | 
| 
       1945 
2123 
     | 
    
         
             
              ID kw_table[1] = { rb_intern("numa") };
         
     | 
| 
       1946 
2124 
     | 
    
         
             
              VALUE kw_values[1] = { Qundef };
         
     | 
| 
         @@ -1948,7 +2126,13 @@ static VALUE rb_llama_llama_init_backend(int argc, VALUE* argv, VALUE self) { 
     | 
|
| 
       1948 
2126 
     | 
    
         
             
              rb_get_kwargs(kw_args, kw_table, 0, 1, kw_values);
         
     | 
| 
       1949 
2127 
     | 
    
         | 
| 
       1950 
2128 
     | 
    
         
             
              const bool numa = kw_values[0] == Qundef ? false : (RTEST ? true : false);
         
     | 
| 
       1951 
     | 
    
         
            -
               
     | 
| 
      
 2129 
     | 
    
         
            +
              llama_backend_init(numa);
         
     | 
| 
      
 2130 
     | 
    
         
            +
             
     | 
| 
      
 2131 
     | 
    
         
            +
              return Qnil;
         
     | 
| 
      
 2132 
     | 
    
         
            +
            }
         
     | 
| 
      
 2133 
     | 
    
         
            +
             
     | 
| 
      
 2134 
     | 
    
         
            +
            static VALUE rb_llama_llama_backend_free(VALUE self) {
         
     | 
| 
      
 2135 
     | 
    
         
            +
              llama_backend_free();
         
     | 
| 
       1952 
2136 
     | 
    
         | 
| 
       1953 
2137 
     | 
    
         
             
              return Qnil;
         
     | 
| 
       1954 
2138 
     | 
    
         
             
            }
         
     | 
| 
         @@ -2010,6 +2194,10 @@ static VALUE rb_llama_mlock_supported(VALUE self) { 
     | 
|
| 
       2010 
2194 
     | 
    
         
             
              return llama_mlock_supported() ? Qtrue : Qfalse;
         
     | 
| 
       2011 
2195 
     | 
    
         
             
            }
         
     | 
| 
       2012 
2196 
     | 
    
         | 
| 
      
 2197 
     | 
    
         
            +
            static VALUE rb_llama_max_devices(VALUE self) {
         
     | 
| 
      
 2198 
     | 
    
         
            +
              return INT2NUM(llama_max_devices());
         
     | 
| 
      
 2199 
     | 
    
         
            +
            }
         
     | 
| 
      
 2200 
     | 
    
         
            +
             
     | 
| 
       2013 
2201 
     | 
    
         
             
            extern "C" void Init_llama_cpp(void) {
         
     | 
| 
       2014 
2202 
     | 
    
         
             
              rb_mLLaMACpp = rb_define_module("LLaMACpp");
         
     | 
| 
       2015 
2203 
     | 
    
         | 
| 
         @@ -2021,7 +2209,8 @@ extern "C" void Init_llama_cpp(void) { 
     | 
|
| 
       2021 
2209 
     | 
    
         
             
              RbLLaMAContextParams::define_class(rb_mLLaMACpp);
         
     | 
| 
       2022 
2210 
     | 
    
         
             
              RbLLaMAModelQuantizeParams::define_class(rb_mLLaMACpp);
         
     | 
| 
       2023 
2211 
     | 
    
         | 
| 
       2024 
     | 
    
         
            -
              rb_define_module_function(rb_mLLaMACpp, " 
     | 
| 
      
 2212 
     | 
    
         
            +
              rb_define_module_function(rb_mLLaMACpp, "backend_init", rb_llama_llama_backend_init, -1);
         
     | 
| 
      
 2213 
     | 
    
         
            +
              rb_define_module_function(rb_mLLaMACpp, "backend_free", rb_llama_llama_backend_free, 0);
         
     | 
| 
       2025 
2214 
     | 
    
         
             
              rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
         
     | 
| 
       2026 
2215 
     | 
    
         
             
              rb_define_module_function(rb_mLLaMACpp, "token_bos", rb_llama_token_bos, 0);
         
     | 
| 
       2027 
2216 
     | 
    
         
             
              rb_define_module_function(rb_mLLaMACpp, "token_eos", rb_llama_token_eos, 0);
         
     | 
| 
         @@ -2029,6 +2218,7 @@ extern "C" void Init_llama_cpp(void) { 
     | 
|
| 
       2029 
2218 
     | 
    
         
             
              rb_define_module_function(rb_mLLaMACpp, "print_system_info", rb_llama_print_system_info, 0);
         
     | 
| 
       2030 
2219 
     | 
    
         
             
              rb_define_module_function(rb_mLLaMACpp, "mmap_supported?", rb_llama_mmap_supported, 0);
         
     | 
| 
       2031 
2220 
     | 
    
         
             
              rb_define_module_function(rb_mLLaMACpp, "mlock_supported?", rb_llama_mlock_supported, 0);
         
     | 
| 
      
 2221 
     | 
    
         
            +
              rb_define_module_function(rb_mLLaMACpp, "max_devices", rb_llama_max_devices, 0);
         
     | 
| 
       2032 
2222 
     | 
    
         | 
| 
       2033 
2223 
     | 
    
         
             
              rb_define_const(rb_mLLaMACpp, "LLAMA_MAX_DEVICES", INT2NUM(LLAMA_MAX_DEVICES));
         
     | 
| 
       2034 
2224 
     | 
    
         |