gpt_neox_client 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/gpt_neox_client/gpt_neox_client.cpp +79 -2
- data/ext/gpt_neox_client/src/main.cpp +5 -0
- data/lib/gpt_neox_client/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
|
4
|
+
data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
|
7
|
+
data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.3.0] - 2023-09-06
|
4
|
+
|
5
|
+
- Add `embeddings` method.
|
6
|
+
```ruby
|
7
|
+
require 'gpt_neox_client'
|
8
|
+
|
9
|
+
client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
|
10
|
+
embd = client.embeddings('Hello, world.', normalize: true)
|
11
|
+
```
|
12
|
+
|
3
13
|
## [0.2.0] - 2023-09-02
|
4
14
|
|
5
15
|
- Add Accelerate framework and Metal build option for macOS.
|
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
196
196
|
const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
|
197
197
|
|
198
198
|
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
199
|
+
std::vector<float> embedding;
|
199
200
|
std::vector<float> logits;
|
200
201
|
size_t mem_per_token = 0;
|
201
|
-
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
|
202
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
202
203
|
|
203
204
|
int n_past = 0;
|
204
205
|
int n_consumed = 0;
|
@@ -212,7 +213,7 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
212
213
|
|
213
214
|
while (n_sampled < n_predict) {
|
214
215
|
if (embd.size() > 0) {
|
215
|
-
if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
|
216
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
216
217
|
rb_raise(rb_eRuntimeError, "failed to predict.");
|
217
218
|
return Qnil;
|
218
219
|
}
|
@@ -248,6 +249,66 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
248
249
|
return rb_utf8_str_new_cstr(completions.c_str());
|
249
250
|
}
|
250
251
|
|
252
|
+
static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
|
253
|
+
VALUE prompt_ = Qnil;
|
254
|
+
VALUE kw_args = Qnil;
|
255
|
+
rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
|
256
|
+
|
257
|
+
ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
|
258
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
259
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
260
|
+
|
261
|
+
if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
|
262
|
+
rb_raise(rb_eArgError, "n_batch must be an integer");
|
263
|
+
return Qnil;
|
264
|
+
}
|
265
|
+
|
266
|
+
std::string prompt(StringValueCStr(prompt_));
|
267
|
+
const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
|
268
|
+
const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
269
|
+
|
270
|
+
gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
|
271
|
+
gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
|
272
|
+
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
273
|
+
|
274
|
+
std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
|
275
|
+
|
276
|
+
if (embd_inp.size() > model->hparams.n_ctx) {
|
277
|
+
rb_raise(rb_eArgError, "prompt is too long");
|
278
|
+
return Qnil;
|
279
|
+
}
|
280
|
+
|
281
|
+
std::vector<float> embedding;
|
282
|
+
std::vector<float> logits;
|
283
|
+
size_t mem_per_token = 0;
|
284
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
285
|
+
|
286
|
+
int n_past = 0;
|
287
|
+
std::vector<gpt_vocab::id> embd;
|
288
|
+
while (!embd_inp.empty()) {
|
289
|
+
const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
|
290
|
+
embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
|
291
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
292
|
+
rb_raise(rb_eRuntimeError, "failed to predict.");
|
293
|
+
return Qnil;
|
294
|
+
}
|
295
|
+
n_past += n_tokens;
|
296
|
+
embd.clear();
|
297
|
+
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
|
298
|
+
}
|
299
|
+
|
300
|
+
if (normalize) {
|
301
|
+
const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
|
302
|
+
for (auto& v : embedding) v /= norm;
|
303
|
+
}
|
304
|
+
|
305
|
+
VALUE res = rb_ary_new2(embedding.size());
|
306
|
+
for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
|
307
|
+
|
308
|
+
RB_GC_GUARD(prompt_);
|
309
|
+
return res;
|
310
|
+
}
|
311
|
+
|
251
312
|
extern "C" void Init_gpt_neox_client(void) {
|
252
313
|
/**
|
253
314
|
* Document-class: GPTNeoXClient
|
@@ -290,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
|
|
290
351
|
* @return [String]
|
291
352
|
*/
|
292
353
|
rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
|
354
|
+
/**
|
355
|
+
* Generates embeddings.
|
356
|
+
*
|
357
|
+
* @example
|
358
|
+
* require "gpt_neox_client"
|
359
|
+
*
|
360
|
+
* client = GPTNeoXClient.new("gpt-neox-f16.bin")
|
361
|
+
* client.embeddings("Hello, my name is")
|
362
|
+
*
|
363
|
+
* @overload embeddings(text, n_batch: 8, normalize: false)
|
364
|
+
* @param [String] text The text.
|
365
|
+
* @param [Integer] n_batch The number of tokens to evalauate at once.
|
366
|
+
* @param [Boolean] normalize The flag to normalize the embeddings.
|
367
|
+
* @return [Array<Float>]
|
368
|
+
*/
|
369
|
+
rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
|
293
370
|
/**
|
294
371
|
* Returns the path to the model.
|
295
372
|
* @return [String]
|
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
|
|
433
433
|
const int n_threads,
|
434
434
|
const int n_past,
|
435
435
|
const std::vector<gpt_vocab::id> & embd_inp,
|
436
|
+
std::vector<float> & embd_d,
|
436
437
|
std::vector<float> & embd_w,
|
437
438
|
size_t & mem_per_token) {
|
438
439
|
const int N = embd_inp.size();
|
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
|
|
657
658
|
//embd_w.resize(n_vocab*N);
|
658
659
|
//memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
|
659
660
|
|
661
|
+
embd_d.resize(n_embd);
|
662
|
+
struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
|
663
|
+
memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
|
664
|
+
|
660
665
|
// return result for just the last token
|
661
666
|
embd_w.resize(n_vocab);
|
662
667
|
memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gpt_neox_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: gpt_neox_client is a simple client for GPT-NeoX.
|
14
14
|
email:
|