gpt_neox_client 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e76735b1c4c6a4e228620bd4cd3ab20d02d0b20505eb85acbcab263301ad4e49
4
- data.tar.gz: 05d285d7b1daa24408c1087f0c748a456a8398d45c59b3b311e1d0a4413df00a
3
+ metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
4
+ data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
5
5
  SHA512:
6
- metadata.gz: dda9974e3d4d1023ec0e8783922c6cb779b41d0083aa26bdfb73e69778de353eee9b26d5185ea0f160bec07df89ec9f34267dd2c42e02f0d95bc224fb4b4a43a
7
- data.tar.gz: 0a2c389774a0e49b8b6f4ee2dac8bea96f5d1608ef4b4ac9cef95f29480fc23f4cbc19c14be97781e9076eee3a9fd247883c979390ff1fd2385159c42a12189e
6
+ metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
7
+ data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.0] - 2023-09-06
4
+
5
+ - Add `embeddings` method.
6
+ ```ruby
7
+ require 'gpt_neox_client'
8
+
9
+ client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
10
+ embd = client.embeddings('Hello, world.', normalize: true)
11
+ ```
12
+
3
13
  ## [0.2.0] - 2023-09-02
4
14
 
5
15
  - Add Accelerate framework and Metal build option for macOS.
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
196
196
  const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
197
197
 
198
198
  const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
199
+ std::vector<float> embedding;
199
200
  std::vector<float> logits;
200
201
  size_t mem_per_token = 0;
201
- gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
202
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
202
203
 
203
204
  int n_past = 0;
204
205
  int n_consumed = 0;
@@ -212,7 +213,7 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
212
213
 
213
214
  while (n_sampled < n_predict) {
214
215
  if (embd.size() > 0) {
215
- if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
216
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
216
217
  rb_raise(rb_eRuntimeError, "failed to predict.");
217
218
  return Qnil;
218
219
  }
@@ -248,6 +249,66 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
248
249
  return rb_utf8_str_new_cstr(completions.c_str());
249
250
  }
250
251
 
252
+ static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
253
+ VALUE prompt_ = Qnil;
254
+ VALUE kw_args = Qnil;
255
+ rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
256
+
257
+ ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
258
+ VALUE kw_values[2] = { Qundef, Qundef };
259
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
260
+
261
+ if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
262
+ rb_raise(rb_eArgError, "n_batch must be an integer");
263
+ return Qnil;
264
+ }
265
+
266
+ std::string prompt(StringValueCStr(prompt_));
267
+ const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
268
+ const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
269
+
270
+ gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
271
+ gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
272
+ const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
273
+
274
+ std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
275
+
276
+ if (embd_inp.size() > model->hparams.n_ctx) {
277
+ rb_raise(rb_eArgError, "prompt is too long");
278
+ return Qnil;
279
+ }
280
+
281
+ std::vector<float> embedding;
282
+ std::vector<float> logits;
283
+ size_t mem_per_token = 0;
284
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
285
+
286
+ int n_past = 0;
287
+ std::vector<gpt_vocab::id> embd;
288
+ while (!embd_inp.empty()) {
289
+ const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
290
+ embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
291
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
292
+ rb_raise(rb_eRuntimeError, "failed to predict.");
293
+ return Qnil;
294
+ }
295
+ n_past += n_tokens;
296
+ embd.clear();
297
+ embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
298
+ }
299
+
300
+ if (normalize) {
301
+ const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
302
+ for (auto& v : embedding) v /= norm;
303
+ }
304
+
305
+ VALUE res = rb_ary_new2(embedding.size());
306
+ for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
307
+
308
+ RB_GC_GUARD(prompt_);
309
+ return res;
310
+ }
311
+
251
312
  extern "C" void Init_gpt_neox_client(void) {
252
313
  /**
253
314
  * Document-class: GPTNeoXClient
@@ -290,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
290
351
  * @return [String]
291
352
  */
292
353
  rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
354
+ /**
355
+ * Generates embeddings.
356
+ *
357
+ * @example
358
+ * require "gpt_neox_client"
359
+ *
360
+ * client = GPTNeoXClient.new("gpt-neox-f16.bin")
361
+ * client.embeddings("Hello, my name is")
362
+ *
363
+ * @overload embeddings(text, n_batch: 8, normalize: false)
364
+ * @param [String] text The text.
365
+ * @param [Integer] n_batch The number of tokens to evalauate at once.
366
+ * @param [Boolean] normalize The flag to normalize the embeddings.
367
+ * @return [Array<Float>]
368
+ */
369
+ rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
293
370
  /**
294
371
  * Returns the path to the model.
295
372
  * @return [String]
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
433
433
  const int n_threads,
434
434
  const int n_past,
435
435
  const std::vector<gpt_vocab::id> & embd_inp,
436
+ std::vector<float> & embd_d,
436
437
  std::vector<float> & embd_w,
437
438
  size_t & mem_per_token) {
438
439
  const int N = embd_inp.size();
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
657
658
  //embd_w.resize(n_vocab*N);
658
659
  //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
659
660
 
661
+ embd_d.resize(n_embd);
662
+ struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
663
+ memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
664
+
660
665
  // return result for just the last token
661
666
  embd_w.resize(n_vocab);
662
667
  memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
@@ -3,5 +3,5 @@
3
3
  # GPTNeoXClient is a Ruby client for GPT-NeoX.
4
4
  class GPTNeoXClient
5
5
  # The version of GPTNeoXClient you are using.
6
- VERSION = '0.2.0'
6
+ VERSION = '0.3.0'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gpt_neox_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-02 00:00:00.000000000 Z
11
+ date: 2023-09-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: gpt_neox_client is a simple client for GPT-NeoX.
14
14
  email: