llama_cpp 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6bf717ec1012d78b7d83f3f7a7546f589fbf368c1b2babc69a99fd28a5d9ff3
4
- data.tar.gz: 6ab2e2ae4b6410f32890a86b7ac2dbb93ab9e2f43888158b7cbfd9b16f435447
3
+ metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
4
+ data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
5
5
  SHA512:
6
- metadata.gz: cd1ae63e518a422dbe3a281a598b18b9397fdf880867f92bad20e56b5a60756a1a929a62879f7aed0c7c24012b87b85353e175c773aeed4f8d87294ba0422cb1
7
- data.tar.gz: 2828321d0589ac16713745b2770844d5c6fed848ff0efed90304370152650a8e0619657a91184f74c402eb9351800ac3517c20f775faf52db91331d95ac1c87d
6
+ metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
7
+ data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
4
+
5
+ - Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
6
+ - Add logits method to LLaMACpp::Context.
7
+ - Add type signatures.
8
+ - Add class alias Params for LLaMACpp::ContextParams.
9
+
10
+ ## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
11
+
12
+ - Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
13
+ - Add n_threads arguments to generate method.
14
+
3
15
  ## [0.0.1] - 2023-04-02
4
16
 
5
17
  - Initial release
data/README.md CHANGED
@@ -20,6 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
20
20
 
21
21
  ## Usage
22
22
 
23
+ Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
24
+
23
25
  ```ruby
24
26
  require 'llama_cpp'
25
27
 
@@ -29,6 +31,7 @@ params.seed = 123456
29
31
  context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
30
32
 
31
33
  puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
34
+ # => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
32
35
  ```
33
36
 
34
37
  ## Contributing
@@ -217,7 +217,7 @@ public:
217
217
  rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
218
218
  rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
219
219
  rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
220
- // rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
220
+ rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
221
221
  rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
222
222
  rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
223
223
  rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
@@ -256,6 +256,9 @@ private:
256
256
  return Qnil;
257
257
  }
258
258
 
259
+ rb_iv_set(self, "@params", kw_values[1]);
260
+ rb_iv_set(self, "@has_evaluated", Qfalse);
261
+
259
262
  RB_GC_GUARD(filename);
260
263
  return self;
261
264
  };
@@ -305,6 +308,9 @@ private:
305
308
  return Qnil;
306
309
  }
307
310
 
311
+ rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
312
+ rb_iv_set(self, "@has_evaluated", Qtrue);
313
+
308
314
  return Qnil;
309
315
  };
310
316
 
@@ -361,12 +367,44 @@ private:
361
367
  return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
362
368
  };
363
369
 
370
+ static VALUE _llama_context_logits(VALUE self) {
371
+ LLaMAContextWrapper* ptr = get_llama_context(self);
372
+ if (ptr->ctx == NULL) {
373
+ rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
374
+ return Qnil;
375
+ }
376
+ if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
377
+ rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
378
+ return Qnil;
379
+ }
380
+
381
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
382
+ const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
383
+ const int n_vocab = llama_n_vocab(ptr->ctx);
384
+ const float* logits = llama_get_logits(ptr->ctx);
385
+ VALUE output = rb_ary_new();
386
+ for (int i = 0; i < n_tokens * n_vocab; i++) {
387
+ rb_ary_push(output, DBL2NUM((double)(logits[i])));
388
+ }
389
+
390
+ return output;
391
+ };
392
+
364
393
  static VALUE _llama_context_embeddings(VALUE self) {
365
394
  LLaMAContextWrapper* ptr = get_llama_context(self);
366
395
  if (ptr->ctx == NULL) {
367
396
  rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
368
397
  return Qnil;
369
398
  }
399
+ LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
400
+ if (!prms_ptr->params.embedding) {
401
+ rb_raise(rb_eRuntimeError, "embedding parameter is false");
402
+ return Qnil;
403
+ }
404
+ if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
405
+ rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
406
+ return Qnil;
407
+ }
370
408
 
371
409
  const int n_embd = llama_n_embd(ptr->ctx);
372
410
  const float* embd = llama_get_embeddings(ptr->ctx);