gpt_neox_client 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/ext/gpt_neox_client/gpt_neox_client.cpp +79 -2
- data/ext/gpt_neox_client/src/main.cpp +5 -0
- data/lib/gpt_neox_client/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
|
4
|
+
data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
|
7
|
+
data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.3.0] - 2023-09-06
|
4
|
+
|
5
|
+
- Add `embeddings` method.
|
6
|
+
```ruby
|
7
|
+
require 'gpt_neox_client'
|
8
|
+
|
9
|
+
client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
|
10
|
+
embd = client.embeddings('Hello, world.', normalize: true)
|
11
|
+
```
|
12
|
+
|
3
13
|
## [0.2.0] - 2023-09-02
|
4
14
|
|
5
15
|
- Add Accelerate framework and Metal build option for macOS.
|
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
196
196
|
const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
|
197
197
|
|
198
198
|
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
199
|
+
std::vector<float> embedding;
|
199
200
|
std::vector<float> logits;
|
200
201
|
size_t mem_per_token = 0;
|
201
|
-
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
|
202
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
202
203
|
|
203
204
|
int n_past = 0;
|
204
205
|
int n_consumed = 0;
|
@@ -212,7 +213,7 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
212
213
|
|
213
214
|
while (n_sampled < n_predict) {
|
214
215
|
if (embd.size() > 0) {
|
215
|
-
if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
|
216
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
216
217
|
rb_raise(rb_eRuntimeError, "failed to predict.");
|
217
218
|
return Qnil;
|
218
219
|
}
|
@@ -248,6 +249,66 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
248
249
|
return rb_utf8_str_new_cstr(completions.c_str());
|
249
250
|
}
|
250
251
|
|
252
|
+
static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
|
253
|
+
VALUE prompt_ = Qnil;
|
254
|
+
VALUE kw_args = Qnil;
|
255
|
+
rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
|
256
|
+
|
257
|
+
ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
|
258
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
259
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
260
|
+
|
261
|
+
if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
|
262
|
+
rb_raise(rb_eArgError, "n_batch must be an integer");
|
263
|
+
return Qnil;
|
264
|
+
}
|
265
|
+
|
266
|
+
std::string prompt(StringValueCStr(prompt_));
|
267
|
+
const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
|
268
|
+
const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
269
|
+
|
270
|
+
gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
|
271
|
+
gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
|
272
|
+
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
273
|
+
|
274
|
+
std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
|
275
|
+
|
276
|
+
if (embd_inp.size() > model->hparams.n_ctx) {
|
277
|
+
rb_raise(rb_eArgError, "prompt is too long");
|
278
|
+
return Qnil;
|
279
|
+
}
|
280
|
+
|
281
|
+
std::vector<float> embedding;
|
282
|
+
std::vector<float> logits;
|
283
|
+
size_t mem_per_token = 0;
|
284
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
285
|
+
|
286
|
+
int n_past = 0;
|
287
|
+
std::vector<gpt_vocab::id> embd;
|
288
|
+
while (!embd_inp.empty()) {
|
289
|
+
const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
|
290
|
+
embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
|
291
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
292
|
+
rb_raise(rb_eRuntimeError, "failed to predict.");
|
293
|
+
return Qnil;
|
294
|
+
}
|
295
|
+
n_past += n_tokens;
|
296
|
+
embd.clear();
|
297
|
+
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
|
298
|
+
}
|
299
|
+
|
300
|
+
if (normalize) {
|
301
|
+
const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
|
302
|
+
for (auto& v : embedding) v /= norm;
|
303
|
+
}
|
304
|
+
|
305
|
+
VALUE res = rb_ary_new2(embedding.size());
|
306
|
+
for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
|
307
|
+
|
308
|
+
RB_GC_GUARD(prompt_);
|
309
|
+
return res;
|
310
|
+
}
|
311
|
+
|
251
312
|
extern "C" void Init_gpt_neox_client(void) {
|
252
313
|
/**
|
253
314
|
* Document-class: GPTNeoXClient
|
@@ -290,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
|
|
290
351
|
* @return [String]
|
291
352
|
*/
|
292
353
|
rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
|
354
|
+
/**
|
355
|
+
* Generates embeddings.
|
356
|
+
*
|
357
|
+
* @example
|
358
|
+
* require "gpt_neox_client"
|
359
|
+
*
|
360
|
+
* client = GPTNeoXClient.new("gpt-neox-f16.bin")
|
361
|
+
* client.embeddings("Hello, my name is")
|
362
|
+
*
|
363
|
+
* @overload embeddings(text, n_batch: 8, normalize: false)
|
364
|
+
* @param [String] text The text.
|
365
|
+
* @param [Integer] n_batch The number of tokens to evalauate at once.
|
366
|
+
* @param [Boolean] normalize The flag to normalize the embeddings.
|
367
|
+
* @return [Array<Float>]
|
368
|
+
*/
|
369
|
+
rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
|
293
370
|
/**
|
294
371
|
* Returns the path to the model.
|
295
372
|
* @return [String]
|
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
|
|
433
433
|
const int n_threads,
|
434
434
|
const int n_past,
|
435
435
|
const std::vector<gpt_vocab::id> & embd_inp,
|
436
|
+
std::vector<float> & embd_d,
|
436
437
|
std::vector<float> & embd_w,
|
437
438
|
size_t & mem_per_token) {
|
438
439
|
const int N = embd_inp.size();
|
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
|
|
657
658
|
//embd_w.resize(n_vocab*N);
|
658
659
|
//memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
|
659
660
|
|
661
|
+
embd_d.resize(n_embd);
|
662
|
+
struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
|
663
|
+
memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
|
664
|
+
|
660
665
|
// return result for just the last token
|
661
666
|
embd_w.resize(n_vocab);
|
662
667
|
memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gpt_neox_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: gpt_neox_client is a simple client for GPT-NeoX.
|
14
14
|
email:
|