llama_cpp 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -0
- data/ext/llama_cpp/llama_cpp.cpp +39 -1
- data/ext/llama_cpp/src/ggml.c +914 -509
- data/ext/llama_cpp/src/ggml.h +42 -27
- data/ext/llama_cpp/src/llama.cpp +293 -303
- data/ext/llama_cpp/src/llama.h +19 -2
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +6 -2
- data/sig/llama_cpp.rbs +52 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
|
4
|
+
data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
|
7
|
+
data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
|
4
|
+
|
5
|
+
- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
|
6
|
+
- Add logits method to LLaMACpp::Context.
|
7
|
+
- Add type signatures.
|
8
|
+
- Add class alias Params for LLaMACpp::ContextParams.
|
9
|
+
|
10
|
+
## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
|
11
|
+
|
12
|
+
- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
|
13
|
+
- Add n_threads arguments to generate method.
|
14
|
+
|
3
15
|
## [0.0.1] - 2023-04-02
|
4
16
|
|
5
17
|
- Initial release
|
data/README.md
CHANGED
@@ -20,6 +20,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
|
+
Prepare a quantized model file by refering to [the usage section on the llama.cpp README](https://github.com/ggerganov/llama.cpp#usage).
|
24
|
+
|
23
25
|
```ruby
|
24
26
|
require 'llama_cpp'
|
25
27
|
|
@@ -29,6 +31,7 @@ params.seed = 123456
|
|
29
31
|
context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
30
32
|
|
31
33
|
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
|
34
|
+
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
32
35
|
```
|
33
36
|
|
34
37
|
## Contributing
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -217,7 +217,7 @@ public:
|
|
217
217
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
218
218
|
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
219
219
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
220
|
-
|
220
|
+
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
221
221
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
222
222
|
rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
|
223
223
|
rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
|
@@ -256,6 +256,9 @@ private:
|
|
256
256
|
return Qnil;
|
257
257
|
}
|
258
258
|
|
259
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
260
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
261
|
+
|
259
262
|
RB_GC_GUARD(filename);
|
260
263
|
return self;
|
261
264
|
};
|
@@ -305,6 +308,9 @@ private:
|
|
305
308
|
return Qnil;
|
306
309
|
}
|
307
310
|
|
311
|
+
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
312
|
+
rb_iv_set(self, "@has_evaluated", Qtrue);
|
313
|
+
|
308
314
|
return Qnil;
|
309
315
|
};
|
310
316
|
|
@@ -361,12 +367,44 @@ private:
|
|
361
367
|
return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
|
362
368
|
};
|
363
369
|
|
370
|
+
static VALUE _llama_context_logits(VALUE self) {
|
371
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
372
|
+
if (ptr->ctx == NULL) {
|
373
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
374
|
+
return Qnil;
|
375
|
+
}
|
376
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
377
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
378
|
+
return Qnil;
|
379
|
+
}
|
380
|
+
|
381
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
382
|
+
const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
|
383
|
+
const int n_vocab = llama_n_vocab(ptr->ctx);
|
384
|
+
const float* logits = llama_get_logits(ptr->ctx);
|
385
|
+
VALUE output = rb_ary_new();
|
386
|
+
for (int i = 0; i < n_tokens * n_vocab; i++) {
|
387
|
+
rb_ary_push(output, DBL2NUM((double)(logits[i])));
|
388
|
+
}
|
389
|
+
|
390
|
+
return output;
|
391
|
+
};
|
392
|
+
|
364
393
|
static VALUE _llama_context_embeddings(VALUE self) {
|
365
394
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
366
395
|
if (ptr->ctx == NULL) {
|
367
396
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
368
397
|
return Qnil;
|
369
398
|
}
|
399
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
400
|
+
if (!prms_ptr->params.embedding) {
|
401
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
402
|
+
return Qnil;
|
403
|
+
}
|
404
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
405
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
406
|
+
return Qnil;
|
407
|
+
}
|
370
408
|
|
371
409
|
const int n_embd = llama_n_embd(ptr->ctx);
|
372
410
|
const float* embd = llama_get_embeddings(ptr->ctx);
|