llama_cpp 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +9 -0
- data/examples/chat.rb +1 -1
- data/examples/embedding.rb +1 -1
- data/examples/prompt_jp.txt +8 -0
- data/ext/llama_cpp/extconf.rb +2 -2
- data/ext/llama_cpp/llama_cpp.cpp +195 -2
- data/ext/llama_cpp/src/ggml-cuda.cu +499 -118
- data/ext/llama_cpp/src/ggml-cuda.h +1 -4
- data/ext/llama_cpp/src/ggml-metal.m +3 -1
- data/ext/llama_cpp/src/ggml-opencl.cpp +357 -176
- data/ext/llama_cpp/src/ggml.c +690 -1512
- data/ext/llama_cpp/src/ggml.h +88 -62
- data/ext/llama_cpp/src/llama.cpp +230 -261
- data/ext/llama_cpp/src/llama.h +31 -6
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +15 -12
- data/sig/llama_cpp.rbs +21 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1fcd28849baae5e90c466665aff4fe5da1d848193ebcf74c3fe333c5674191c
|
4
|
+
data.tar.gz: fcb0c64528d24c5cfad677f17bfd6e1e817a4b8279317ca5b2113302735598b9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c70b5f919feb7a585efbe21b3360254c2f5789504cd73fecee12fd686483c77eeb763ed91a8e7434d5852208555a78f168b358d0895f15b1ea7e774d36d6910a
|
7
|
+
data.tar.gz: f554ad58fc9d68c39b80995b7f424468386b32a5847dbdefbceb1cba53ff7182da35be8599523d82a6daa8fee23667d07e06faedc4c727d52e8fc594d0bc7d3f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,37 @@
|
|
1
|
+
## [[0.3.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.1...v0.3.2)] - 2023-07-08
|
2
|
+
|
3
|
+
- Bump bundled llama.cpp from master-b8c8dda to master-481f793.
|
4
|
+
- Add `Timings` class and `timings` method to `Context`:
|
5
|
+
```ruby
|
6
|
+
require 'llama_cpp'
|
7
|
+
|
8
|
+
# ...
|
9
|
+
|
10
|
+
context = LLaMACpp::Context.new(model: model)
|
11
|
+
timings = context.timings
|
12
|
+
|
13
|
+
puts timings.class
|
14
|
+
# => LLaMACpp::Timings
|
15
|
+
puts timings.t_load_ms
|
16
|
+
# => 79.61
|
17
|
+
```
|
18
|
+
- Expose sampling options as the arguemnts of `generate` module function:
|
19
|
+
```ruby
|
20
|
+
require 'llama_cpp'
|
21
|
+
|
22
|
+
# ...
|
23
|
+
|
24
|
+
LLaMACpp.generate(context, 'Hello, world.', top_k: 30, top_p: 0.8, temperature: 0.9)
|
25
|
+
```
|
26
|
+
- Add `ModelQuantizaParams` class, this class was not published because the author forgot to write rb_define_class.
|
27
|
+
- Minor update to example scripts, configuration files, and documentations.
|
28
|
+
|
29
|
+
## [[0.3.1](https://github.com/yoshoku/llama_cpp.rb/compare/v0.3.0...v0.3.1)] - 2023-07-02
|
30
|
+
|
31
|
+
- Bump bundled llama.cpp from master-9d23589 to master-b8c8dda.
|
32
|
+
- Use unsigned values for random seed.
|
33
|
+
- Add `eval_embd` method to `Context` class.
|
34
|
+
|
1
35
|
## [[0.3.0](https://github.com/yoshoku/llama_cpp.rb/compare/v0.2.2...v0.3.0)] - 2023-06-30
|
2
36
|
|
3
37
|
- Add no_k_quants and qkk_64 config options:
|
data/README.md
CHANGED
@@ -68,6 +68,15 @@ User:
|
|
68
68
|
|
69
69
|

|
70
70
|
|
71
|
+
Japanse chat is also possible using the [Vicuna model on Hugging Face](https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized).
|
72
|
+
|
73
|
+
```sh
|
74
|
+
$ wget https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_0.bin
|
75
|
+
$ ruby chat.rb --model ggml-vicuna-7b-1.1-q4_0.bin --file prompt_jp.txt
|
76
|
+
```
|
77
|
+
|
78
|
+

|
79
|
+
|
71
80
|
## Contributing
|
72
81
|
|
73
82
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/llama_cpp.rb.
|
data/examples/chat.rb
CHANGED
@@ -33,7 +33,7 @@ class Chat < Thor # rubocop:disable Metrics/ClassLength, Style/Documentation
|
|
33
33
|
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
34
34
|
def main # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
35
35
|
params = LLaMACpp::ContextParams.new
|
36
|
-
params.seed = options[:seed]
|
36
|
+
params.seed = options[:seed] if options[:seed] != -1
|
37
37
|
params.n_gpu_layers = options[:n_gpu_layers]
|
38
38
|
model = LLaMACpp::Model.new(model_path: options[:model], params: params)
|
39
39
|
context = LLaMACpp::Context.new(model: model)
|
data/examples/embedding.rb
CHANGED
@@ -18,7 +18,7 @@ class Embedding < Thor # rubocop:disable Style/Documentation
|
|
18
18
|
option :n_gpu_layers, type: :numeric, desc: 'number of layers on GPU', default: 0
|
19
19
|
def main # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
20
20
|
params = LLaMACpp::ContextParams.new
|
21
|
-
params.seed = options[:seed]
|
21
|
+
params.seed = options[:seed] if options[:seed] != -1
|
22
22
|
params.n_gpu_layers = options[:n_gpu_layers]
|
23
23
|
params.embedding = true
|
24
24
|
model = LLaMACpp::Model.new(model_path: options[:model], params: params)
|
data/ext/llama_cpp/extconf.rb
CHANGED
@@ -7,8 +7,8 @@ abort 'libstdc++ is not found.' unless have_library('stdc++')
|
|
7
7
|
|
8
8
|
$srcs = %w[ggml.c llama.cpp llama_cpp.cpp]
|
9
9
|
$srcs << 'ggml-opencl.cpp' if with_config('clblast')
|
10
|
-
$CFLAGS << ' -w'
|
11
|
-
$CXXFLAGS << ' -std=c++11'
|
10
|
+
$CFLAGS << ' -w -DNDEBUG'
|
11
|
+
$CXXFLAGS << ' -std=c++11 -DNDEBUG'
|
12
12
|
$INCFLAGS << ' -I$(srcdir)/src'
|
13
13
|
$VPATH << '$(srcdir)/src'
|
14
14
|
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
|
2
1
|
#include "llama_cpp.h"
|
3
2
|
|
4
3
|
VALUE rb_mLLaMACpp;
|
5
4
|
VALUE rb_cLLaMAModel;
|
5
|
+
VALUE rb_cLLaMATimings;
|
6
6
|
VALUE rb_cLLaMAContext;
|
7
7
|
VALUE rb_cLLaMAContextParams;
|
8
8
|
VALUE rb_cLLaMAModelQuantizeParams;
|
@@ -256,6 +256,111 @@ const rb_data_type_t RbLLaMATokenDataArray::llama_token_data_array_type = {
|
|
256
256
|
RUBY_TYPED_FREE_IMMEDIATELY
|
257
257
|
};
|
258
258
|
|
259
|
+
class LLaMATimingsWrapper {
|
260
|
+
public:
|
261
|
+
struct llama_timings timings;
|
262
|
+
|
263
|
+
LLaMATimingsWrapper(){};
|
264
|
+
|
265
|
+
~LLaMATimingsWrapper(){};
|
266
|
+
};
|
267
|
+
|
268
|
+
class RbLLaMATimings {
|
269
|
+
public:
|
270
|
+
static VALUE llama_timings_alloc(VALUE self) {
|
271
|
+
LLaMATimingsWrapper* ptr = (LLaMATimingsWrapper*)ruby_xmalloc(sizeof(LLaMATimingsWrapper));
|
272
|
+
new (ptr) LLaMATimingsWrapper();
|
273
|
+
return TypedData_Wrap_Struct(self, &llama_timings_type, ptr);
|
274
|
+
}
|
275
|
+
|
276
|
+
static void llama_timings_free(void* ptr) {
|
277
|
+
((LLaMATimingsWrapper*)ptr)->~LLaMATimingsWrapper();
|
278
|
+
ruby_xfree(ptr);
|
279
|
+
}
|
280
|
+
|
281
|
+
static size_t llama_timings_size(const void* ptr) {
|
282
|
+
return sizeof(*((LLaMATimingsWrapper*)ptr));
|
283
|
+
}
|
284
|
+
|
285
|
+
static LLaMATimingsWrapper* get_llama_timings(VALUE self) {
|
286
|
+
LLaMATimingsWrapper* ptr;
|
287
|
+
TypedData_Get_Struct(self, LLaMATimingsWrapper, &llama_timings_type, ptr);
|
288
|
+
return ptr;
|
289
|
+
}
|
290
|
+
|
291
|
+
static void define_class(VALUE outer) {
|
292
|
+
rb_cLLaMATimings = rb_define_class_under(outer, "Timings", rb_cObject);
|
293
|
+
rb_define_alloc_func(rb_cLLaMATimings, llama_timings_alloc);
|
294
|
+
rb_define_method(rb_cLLaMATimings, "t_start_ms", RUBY_METHOD_FUNC(_llama_timings_get_t_start_ms), 0);
|
295
|
+
rb_define_method(rb_cLLaMATimings, "t_end_ms", RUBY_METHOD_FUNC(_llama_timings_get_t_end_ms), 0);
|
296
|
+
rb_define_method(rb_cLLaMATimings, "t_load_ms", RUBY_METHOD_FUNC(_llama_timings_get_t_load_ms), 0);
|
297
|
+
rb_define_method(rb_cLLaMATimings, "t_sample_ms", RUBY_METHOD_FUNC(_llama_timings_get_t_sample_ms), 0);
|
298
|
+
rb_define_method(rb_cLLaMATimings, "t_p_eval_ms", RUBY_METHOD_FUNC(_llama_timings_get_t_p_eval_ms), 0);
|
299
|
+
rb_define_method(rb_cLLaMATimings, "t_eval_ms", RUBY_METHOD_FUNC(_llama_timings_get_t_eval_ms), 0);
|
300
|
+
rb_define_method(rb_cLLaMATimings, "n_sample", RUBY_METHOD_FUNC(_llama_timings_get_n_sample), 0);
|
301
|
+
rb_define_method(rb_cLLaMATimings, "n_p_eval", RUBY_METHOD_FUNC(_llama_timings_get_n_p_eval), 0);
|
302
|
+
rb_define_method(rb_cLLaMATimings, "n_eval", RUBY_METHOD_FUNC(_llama_timings_get_n_eval), 0);
|
303
|
+
}
|
304
|
+
|
305
|
+
private:
|
306
|
+
static const rb_data_type_t llama_timings_type;
|
307
|
+
|
308
|
+
static VALUE _llama_timings_get_t_start_ms(VALUE self) {
|
309
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
310
|
+
return DBL2NUM(ptr->timings.t_start_ms);
|
311
|
+
}
|
312
|
+
|
313
|
+
static VALUE _llama_timings_get_t_end_ms(VALUE self) {
|
314
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
315
|
+
return DBL2NUM(ptr->timings.t_end_ms);
|
316
|
+
}
|
317
|
+
|
318
|
+
static VALUE _llama_timings_get_t_load_ms(VALUE self) {
|
319
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
320
|
+
return DBL2NUM(ptr->timings.t_load_ms);
|
321
|
+
}
|
322
|
+
|
323
|
+
static VALUE _llama_timings_get_t_sample_ms(VALUE self) {
|
324
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
325
|
+
return DBL2NUM(ptr->timings.t_sample_ms);
|
326
|
+
}
|
327
|
+
|
328
|
+
static VALUE _llama_timings_get_t_p_eval_ms(VALUE self) {
|
329
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
330
|
+
return DBL2NUM(ptr->timings.t_p_eval_ms);
|
331
|
+
}
|
332
|
+
|
333
|
+
static VALUE _llama_timings_get_t_eval_ms(VALUE self) {
|
334
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
335
|
+
return DBL2NUM(ptr->timings.t_eval_ms);
|
336
|
+
}
|
337
|
+
|
338
|
+
static VALUE _llama_timings_get_n_sample(VALUE self) {
|
339
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
340
|
+
return INT2NUM(ptr->timings.n_sample);
|
341
|
+
}
|
342
|
+
|
343
|
+
static VALUE _llama_timings_get_n_p_eval(VALUE self) {
|
344
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
345
|
+
return INT2NUM(ptr->timings.n_p_eval);
|
346
|
+
}
|
347
|
+
|
348
|
+
static VALUE _llama_timings_get_n_eval(VALUE self) {
|
349
|
+
LLaMATimingsWrapper* ptr = get_llama_timings(self);
|
350
|
+
return INT2NUM(ptr->timings.n_eval);
|
351
|
+
}
|
352
|
+
};
|
353
|
+
|
354
|
+
const rb_data_type_t RbLLaMATimings::llama_timings_type = {
|
355
|
+
"RbLLaMATimings",
|
356
|
+
{ NULL,
|
357
|
+
RbLLaMATimings::llama_timings_free,
|
358
|
+
RbLLaMATimings::llama_timings_size },
|
359
|
+
NULL,
|
360
|
+
NULL,
|
361
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
362
|
+
};
|
363
|
+
|
259
364
|
class LLaMAContextParamsWrapper {
|
260
365
|
public:
|
261
366
|
struct llama_context_params params;
|
@@ -404,6 +509,10 @@ private:
|
|
404
509
|
// seed
|
405
510
|
static VALUE _llama_context_params_set_seed(VALUE self, VALUE seed) {
|
406
511
|
LLaMAContextParamsWrapper* ptr = get_llama_context_params(self);
|
512
|
+
if (NUM2INT(seed) < 0) {
|
513
|
+
rb_raise(rb_eArgError, "seed must be positive");
|
514
|
+
return Qnil;
|
515
|
+
}
|
407
516
|
ptr->params.seed = NUM2INT(seed);
|
408
517
|
return INT2NUM(ptr->params.seed);
|
409
518
|
};
|
@@ -685,6 +794,10 @@ private:
|
|
685
794
|
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(kw_values[1]);
|
686
795
|
LLaMAModelWrapper* model_ptr = get_llama_model(self);
|
687
796
|
|
797
|
+
if (prms_ptr->params.seed == LLAMA_DEFAULT_SEED) {
|
798
|
+
prms_ptr->params.seed = time(NULL);
|
799
|
+
}
|
800
|
+
|
688
801
|
try {
|
689
802
|
model_ptr->model = llama_load_model_from_file(StringValueCStr(filename), prms_ptr->params);
|
690
803
|
} catch (const std::runtime_error& e) {
|
@@ -848,6 +961,7 @@ public:
|
|
848
961
|
rb_define_alloc_func(rb_cLLaMAContext, llama_context_alloc);
|
849
962
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
850
963
|
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
964
|
+
rb_define_method(rb_cLLaMAContext, "eval_embd", RUBY_METHOD_FUNC(_llama_context_eval_embd), -1);
|
851
965
|
rb_define_method(rb_cLLaMAContext, "eval_export", RUBY_METHOD_FUNC(_llama_context_eval_export), 1);
|
852
966
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
853
967
|
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
@@ -857,6 +971,7 @@ public:
|
|
857
971
|
rb_define_method(rb_cLLaMAContext, "n_vocab", RUBY_METHOD_FUNC(_llama_context_n_vocab), 0);
|
858
972
|
rb_define_method(rb_cLLaMAContext, "n_ctx", RUBY_METHOD_FUNC(_llama_context_n_ctx), 0);
|
859
973
|
rb_define_method(rb_cLLaMAContext, "n_embd", RUBY_METHOD_FUNC(_llama_context_n_embd), 0);
|
974
|
+
rb_define_method(rb_cLLaMAContext, "timings", RUBY_METHOD_FUNC(_llama_context_get_timings), 0);
|
860
975
|
rb_define_method(rb_cLLaMAContext, "print_timings", RUBY_METHOD_FUNC(_llama_context_print_timings), 0);
|
861
976
|
rb_define_method(rb_cLLaMAContext, "reset_timings", RUBY_METHOD_FUNC(_llama_context_reset_timings), 0);
|
862
977
|
rb_define_method(rb_cLLaMAContext, "kv_cache_token_count", RUBY_METHOD_FUNC(_llama_context_kv_cache_token_count), 0);
|
@@ -971,6 +1086,61 @@ private:
|
|
971
1086
|
return Qnil;
|
972
1087
|
};
|
973
1088
|
|
1089
|
+
static VALUE _llama_context_eval_embd(int argc, VALUE* argv, VALUE self) {
|
1090
|
+
VALUE kw_args = Qnil;
|
1091
|
+
ID kw_table[4] = { rb_intern("embd"), rb_intern("n_past"), rb_intern("n_tokens"), rb_intern("n_threads") };
|
1092
|
+
VALUE kw_values[4] = { Qundef, Qundef, Qundef, Qundef };
|
1093
|
+
rb_scan_args(argc, argv, ":", &kw_args);
|
1094
|
+
rb_get_kwargs(kw_args, kw_table, 2, 2, kw_values);
|
1095
|
+
|
1096
|
+
if (!RB_TYPE_P(kw_values[0], T_ARRAY)) {
|
1097
|
+
rb_raise(rb_eArgError, "tokens must be an Array");
|
1098
|
+
return Qnil;
|
1099
|
+
}
|
1100
|
+
if (!RB_INTEGER_TYPE_P(kw_values[1])) {
|
1101
|
+
rb_raise(rb_eArgError, "n_past must be an integer");
|
1102
|
+
return Qnil;
|
1103
|
+
}
|
1104
|
+
if (kw_values[2] != Qundef && !RB_INTEGER_TYPE_P(kw_values[2])) {
|
1105
|
+
rb_raise(rb_eArgError, "n_tokens must be an integer");
|
1106
|
+
return Qnil;
|
1107
|
+
}
|
1108
|
+
if (kw_values[3] != Qundef && !RB_INTEGER_TYPE_P(kw_values[3])) {
|
1109
|
+
rb_raise(rb_eArgError, "n_threads must be an integer");
|
1110
|
+
return Qnil;
|
1111
|
+
}
|
1112
|
+
|
1113
|
+
const size_t tokens_len = RARRAY_LEN(kw_values[0]);
|
1114
|
+
std::vector<float> embd(tokens_len);
|
1115
|
+
for (size_t i = 0; i < tokens_len; i++) {
|
1116
|
+
VALUE el = rb_ary_entry(kw_values[0], i);
|
1117
|
+
if (!RB_FLOAT_TYPE_P(el)) {
|
1118
|
+
rb_raise(rb_eArgError, "embd must be an array of floats");
|
1119
|
+
return Qnil;
|
1120
|
+
}
|
1121
|
+
embd[i] = NUM2DBL(el);
|
1122
|
+
}
|
1123
|
+
|
1124
|
+
const int n_tokens = kw_values[2] == Qundef ? (int)tokens_len : NUM2INT(kw_values[2]);
|
1125
|
+
const int n_past = NUM2INT(kw_values[1]);
|
1126
|
+
const int n_threads = kw_values[3] == Qundef ? 1 : NUM2INT(kw_values[3]);
|
1127
|
+
|
1128
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1129
|
+
if (ptr->ctx == NULL) {
|
1130
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1131
|
+
return Qnil;
|
1132
|
+
}
|
1133
|
+
if (llama_eval_embd(ptr->ctx, embd.data(), n_tokens, n_past, n_threads) != 0) {
|
1134
|
+
rb_raise(rb_eRuntimeError, "Failed to evaluate");
|
1135
|
+
return Qnil;
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
1139
|
+
rb_iv_set(self, "@has_evaluated", Qtrue);
|
1140
|
+
|
1141
|
+
return Qnil;
|
1142
|
+
}
|
1143
|
+
|
974
1144
|
static VALUE _llama_context_eval_export(VALUE self, VALUE fname_) {
|
975
1145
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
976
1146
|
if (ptr->ctx == NULL) {
|
@@ -1163,6 +1333,18 @@ private:
|
|
1163
1333
|
return INT2NUM(llama_n_embd(ptr->ctx));
|
1164
1334
|
};
|
1165
1335
|
|
1336
|
+
static VALUE _llama_context_get_timings(VALUE self) {
|
1337
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1338
|
+
if (ptr->ctx == NULL) {
|
1339
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1340
|
+
return Qnil;
|
1341
|
+
}
|
1342
|
+
VALUE tm_obj = rb_funcall(rb_cLLaMATimings, rb_intern("new"), 0);
|
1343
|
+
LLaMATimingsWrapper* tm_ptr = RbLLaMATimings::get_llama_timings(tm_obj);
|
1344
|
+
tm_ptr->timings = llama_get_timings(ptr->ctx);
|
1345
|
+
return tm_obj;
|
1346
|
+
}
|
1347
|
+
|
1166
1348
|
static VALUE _llama_context_print_timings(VALUE self) {
|
1167
1349
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
1168
1350
|
if (ptr->ctx == NULL) {
|
@@ -1198,7 +1380,11 @@ private:
|
|
1198
1380
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
1199
1381
|
return Qnil;
|
1200
1382
|
}
|
1201
|
-
|
1383
|
+
if (NUM2INT(seed_) < 0) {
|
1384
|
+
rb_raise(rb_eArgError, "seed must be a non-negative integer");
|
1385
|
+
return Qnil;
|
1386
|
+
}
|
1387
|
+
const uint32_t seed = NUM2INT(seed_);
|
1202
1388
|
llama_set_rng_seed(ptr->ctx, seed);
|
1203
1389
|
return Qnil;
|
1204
1390
|
};
|
@@ -1830,8 +2016,10 @@ extern "C" void Init_llama_cpp(void) {
|
|
1830
2016
|
RbLLaMATokenData::define_class(rb_mLLaMACpp);
|
1831
2017
|
RbLLaMATokenDataArray::define_class(rb_mLLaMACpp);
|
1832
2018
|
RbLLaMAModel::define_class(rb_mLLaMACpp);
|
2019
|
+
RbLLaMATimings::define_class(rb_mLLaMACpp);
|
1833
2020
|
RbLLaMAContext::define_class(rb_mLLaMACpp);
|
1834
2021
|
RbLLaMAContextParams::define_class(rb_mLLaMACpp);
|
2022
|
+
RbLLaMAModelQuantizeParams::define_class(rb_mLLaMACpp);
|
1835
2023
|
|
1836
2024
|
rb_define_module_function(rb_mLLaMACpp, "init_backend", rb_llama_llama_init_backend, -1);
|
1837
2025
|
rb_define_module_function(rb_mLLaMACpp, "model_quantize", rb_llama_model_quantize, -1);
|
@@ -1901,6 +2089,11 @@ extern "C" void Init_llama_cpp(void) {
|
|
1901
2089
|
ss_magic << std::showbase << std::hex << LLAMA_SESSION_MAGIC;
|
1902
2090
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_MAGIC", rb_str_new2(ss_magic.str().c_str()));
|
1903
2091
|
|
2092
|
+
ss_magic.str("");
|
2093
|
+
ss_magic.clear(std::stringstream::goodbit);
|
2094
|
+
ss_magic << std::showbase << std::hex << LLAMA_DEFAULT_SEED;
|
2095
|
+
rb_define_const(rb_mLLaMACpp, "LLAMA_DEFAULT_SEED", rb_str_new2(ss_magic.str().c_str()));
|
2096
|
+
|
1904
2097
|
rb_define_const(rb_mLLaMACpp, "LLAMA_FILE_VERSION", rb_str_new2(std::to_string(LLAMA_FILE_VERSION).c_str()));
|
1905
2098
|
rb_define_const(rb_mLLaMACpp, "LLAMA_SESSION_VERSION", rb_str_new2(std::to_string(LLAMA_SESSION_VERSION).c_str()));
|
1906
2099
|
}
|