llama_cpp 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -2
- data/README.md +1 -0
- data/ext/llama_cpp/llama_cpp.cpp +39 -1
- data/ext/llama_cpp/src/ggml.c +587 -485
- data/ext/llama_cpp/src/ggml.h +36 -26
- data/ext/llama_cpp/src/llama.cpp +85 -46
- data/ext/llama_cpp/src/llama.h +17 -0
- data/lib/llama_cpp/version.rb +2 -2
- data/lib/llama_cpp.rb +4 -1
- data/sig/llama_cpp.rbs +52 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3ce894c9b013134688dffb18229c6f18073cdc8aceafa7d8a519803ae8ffc8a4
|
4
|
+
data.tar.gz: b9a09f3b7217c120d0eae5e89ecf15a4ccbedcdef92db7d5c4508d03ecd65d3c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a979c8a488ec410f214873664288f618af9363d60b6ef6b3ef44de9bd7486bd223b8b38704eab09c1cec1f210c55e5d08ba03af8d6ddc87c10d8836da983c1de
|
7
|
+
data.tar.gz: 47228be684c3ce577b066b2255482c42f6979c4cce5852c22e85a9f0b66bdcaea58d667c56f2eefef6cfc121822a2761406cd2911abccd754c07e8568bb8550e
|
data/CHANGELOG.md
CHANGED
@@ -1,8 +1,15 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.0.2] - 2023-04-
|
3
|
+
## [[0.0.3](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.2...v0.0.3)] - 2023-04-08
|
4
4
|
|
5
|
-
- Bump bundled llama.cpp from master-
|
5
|
+
- Bump bundled llama.cpp from master-5b70e7d to master-698f7b5.
|
6
|
+
- Add logits method to LLaMACpp::Context.
|
7
|
+
- Add type signatures.
|
8
|
+
- Add class alias Params for LLaMACpp::ContextParams.
|
9
|
+
|
10
|
+
## [[0.0.2](https://github.com/yoshoku/llama_cpp.rb/compare/v0.0.1...v0.0.2)] - 2023-04-02
|
11
|
+
|
12
|
+
- Bump bundled llama.cpp from master-2a98bc1 to master-5b70e7d.
|
6
13
|
- Add n_threads arguments to generate method.
|
7
14
|
|
8
15
|
## [0.0.1] - 2023-04-02
|
data/README.md
CHANGED
@@ -31,6 +31,7 @@ params.seed = 123456
|
|
31
31
|
context = LLaMACpp::Context.new(model_path: '/path/to/ggml-model-q4_0.bin', params: params)
|
32
32
|
|
33
33
|
puts LLaMACpp.generate(context, 'Please tell me the largest city in Japan.')
|
34
|
+
# => "There are two major cities in Japan, Tokyo and Osaka, which have about 30 million populations."
|
34
35
|
```
|
35
36
|
|
36
37
|
## Contributing
|
data/ext/llama_cpp/llama_cpp.cpp
CHANGED
@@ -217,7 +217,7 @@ public:
|
|
217
217
|
rb_define_method(rb_cLLaMAContext, "initialize", RUBY_METHOD_FUNC(_llama_context_initialize), -1);
|
218
218
|
rb_define_method(rb_cLLaMAContext, "eval", RUBY_METHOD_FUNC(_llama_context_eval), -1);
|
219
219
|
rb_define_method(rb_cLLaMAContext, "tokenize", RUBY_METHOD_FUNC(_llama_context_tokenize), -1);
|
220
|
-
|
220
|
+
rb_define_method(rb_cLLaMAContext, "logits", RUBY_METHOD_FUNC(_llama_context_logits), 0);
|
221
221
|
rb_define_method(rb_cLLaMAContext, "embeddings", RUBY_METHOD_FUNC(_llama_context_embeddings), 0);
|
222
222
|
rb_define_method(rb_cLLaMAContext, "token_to_str", RUBY_METHOD_FUNC(_llama_context_token_to_str), 1);
|
223
223
|
rb_define_method(rb_cLLaMAContext, "sample_top_p_top_k", RUBY_METHOD_FUNC(_llama_context_sample_top_p_top_k), -1);
|
@@ -256,6 +256,9 @@ private:
|
|
256
256
|
return Qnil;
|
257
257
|
}
|
258
258
|
|
259
|
+
rb_iv_set(self, "@params", kw_values[1]);
|
260
|
+
rb_iv_set(self, "@has_evaluated", Qfalse);
|
261
|
+
|
259
262
|
RB_GC_GUARD(filename);
|
260
263
|
return self;
|
261
264
|
};
|
@@ -305,6 +308,9 @@ private:
|
|
305
308
|
return Qnil;
|
306
309
|
}
|
307
310
|
|
311
|
+
rb_iv_set(self, "@n_tokens", INT2NUM(n_tokens));
|
312
|
+
rb_iv_set(self, "@has_evaluated", Qtrue);
|
313
|
+
|
308
314
|
return Qnil;
|
309
315
|
};
|
310
316
|
|
@@ -361,12 +367,44 @@ private:
|
|
361
367
|
return str != nullptr ? rb_utf8_str_new_cstr(str) : rb_utf8_str_new_cstr("");
|
362
368
|
};
|
363
369
|
|
370
|
+
static VALUE _llama_context_logits(VALUE self) {
|
371
|
+
LLaMAContextWrapper* ptr = get_llama_context(self);
|
372
|
+
if (ptr->ctx == NULL) {
|
373
|
+
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
374
|
+
return Qnil;
|
375
|
+
}
|
376
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
377
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
378
|
+
return Qnil;
|
379
|
+
}
|
380
|
+
|
381
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
382
|
+
const int n_tokens = prms_ptr->params.logits_all ? NUM2INT(rb_iv_get(self, "@n_tokens")) : 1;
|
383
|
+
const int n_vocab = llama_n_vocab(ptr->ctx);
|
384
|
+
const float* logits = llama_get_logits(ptr->ctx);
|
385
|
+
VALUE output = rb_ary_new();
|
386
|
+
for (int i = 0; i < n_tokens * n_vocab; i++) {
|
387
|
+
rb_ary_push(output, DBL2NUM((double)(logits[i])));
|
388
|
+
}
|
389
|
+
|
390
|
+
return output;
|
391
|
+
};
|
392
|
+
|
364
393
|
static VALUE _llama_context_embeddings(VALUE self) {
|
365
394
|
LLaMAContextWrapper* ptr = get_llama_context(self);
|
366
395
|
if (ptr->ctx == NULL) {
|
367
396
|
rb_raise(rb_eRuntimeError, "LLaMA context is not initialized");
|
368
397
|
return Qnil;
|
369
398
|
}
|
399
|
+
LLaMAContextParamsWrapper* prms_ptr = RbLLaMAContextParams::get_llama_context_params(rb_iv_get(self, "@params"));
|
400
|
+
if (!prms_ptr->params.embedding) {
|
401
|
+
rb_raise(rb_eRuntimeError, "embedding parameter is false");
|
402
|
+
return Qnil;
|
403
|
+
}
|
404
|
+
if (rb_iv_get(self, "@has_evaluated") != Qtrue) {
|
405
|
+
rb_raise(rb_eRuntimeError, "LLaMA context has not been evaluated");
|
406
|
+
return Qnil;
|
407
|
+
}
|
370
408
|
|
371
409
|
const int n_embd = llama_n_embd(ptr->ctx);
|
372
410
|
const float* embd = llama_get_embeddings(ptr->ctx);
|