gpt_neox_client 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e76735b1c4c6a4e228620bd4cd3ab20d02d0b20505eb85acbcab263301ad4e49
4
- data.tar.gz: 05d285d7b1daa24408c1087f0c748a456a8398d45c59b3b311e1d0a4413df00a
3
+ metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
4
+ data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
5
5
  SHA512:
6
- metadata.gz: dda9974e3d4d1023ec0e8783922c6cb779b41d0083aa26bdfb73e69778de353eee9b26d5185ea0f160bec07df89ec9f34267dd2c42e02f0d95bc224fb4b4a43a
7
- data.tar.gz: 0a2c389774a0e49b8b6f4ee2dac8bea96f5d1608ef4b4ac9cef95f29480fc23f4cbc19c14be97781e9076eee3a9fd247883c979390ff1fd2385159c42a12189e
6
+ metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
7
+ data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.0] - 2023-09-06
4
+
5
+ - Add `embeddings` method.
6
+ ```ruby
7
+ require 'gpt_neox_client'
8
+
9
+ client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
10
+ embd = client.embeddings('Hello, world.', normalize: true)
11
+ ```
12
+
3
13
  ## [0.2.0] - 2023-09-02
4
14
 
5
15
  - Add Accelerate framework and Metal build option for macOS.
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
196
196
  const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
197
197
 
198
198
  const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
199
+ std::vector<float> embedding;
199
200
  std::vector<float> logits;
200
201
  size_t mem_per_token = 0;
201
- gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
202
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
202
203
 
203
204
  int n_past = 0;
204
205
  int n_consumed = 0;
@@ -212,7 +213,7 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
212
213
 
213
214
  while (n_sampled < n_predict) {
214
215
  if (embd.size() > 0) {
215
- if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
216
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
216
217
  rb_raise(rb_eRuntimeError, "failed to predict.");
217
218
  return Qnil;
218
219
  }
@@ -248,6 +249,66 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
248
249
  return rb_utf8_str_new_cstr(completions.c_str());
249
250
  }
250
251
 
252
+ static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
253
+ VALUE prompt_ = Qnil;
254
+ VALUE kw_args = Qnil;
255
+ rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
256
+
257
+ ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
258
+ VALUE kw_values[2] = { Qundef, Qundef };
259
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
260
+
261
+ if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
262
+ rb_raise(rb_eArgError, "n_batch must be an integer");
263
+ return Qnil;
264
+ }
265
+
266
+ std::string prompt(StringValueCStr(prompt_));
267
+ const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
268
+ const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
269
+
270
+ gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
271
+ gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
272
+ const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
273
+
274
+ std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
275
+
276
+ if (embd_inp.size() > model->hparams.n_ctx) {
277
+ rb_raise(rb_eArgError, "prompt is too long");
278
+ return Qnil;
279
+ }
280
+
281
+ std::vector<float> embedding;
282
+ std::vector<float> logits;
283
+ size_t mem_per_token = 0;
284
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
285
+
286
+ int n_past = 0;
287
+ std::vector<gpt_vocab::id> embd;
288
+ while (!embd_inp.empty()) {
289
+ const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
290
+ embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
291
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
292
+ rb_raise(rb_eRuntimeError, "failed to predict.");
293
+ return Qnil;
294
+ }
295
+ n_past += n_tokens;
296
+ embd.clear();
297
+ embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
298
+ }
299
+
300
+ if (normalize) {
301
+ const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
302
+ for (auto& v : embedding) v /= norm;
303
+ }
304
+
305
+ VALUE res = rb_ary_new2(embedding.size());
306
+ for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
307
+
308
+ RB_GC_GUARD(prompt_);
309
+ return res;
310
+ }
311
+
251
312
  extern "C" void Init_gpt_neox_client(void) {
252
313
  /**
253
314
  * Document-class: GPTNeoXClient
@@ -290,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
290
351
  * @return [String]
291
352
  */
292
353
  rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
354
+ /**
355
+ * Generates embeddings.
356
+ *
357
+ * @example
358
+ * require "gpt_neox_client"
359
+ *
360
+ * client = GPTNeoXClient.new("gpt-neox-f16.bin")
361
+ * client.embeddings("Hello, my name is")
362
+ *
363
+ * @overload embeddings(text, n_batch: 8, normalize: false)
364
+ * @param [String] text The text.
365
+ * @param [Integer] n_batch The number of tokens to evalauate at once.
366
+ * @param [Boolean] normalize The flag to normalize the embeddings.
367
+ * @return [Array<Float>]
368
+ */
369
+ rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
293
370
  /**
294
371
  * Returns the path to the model.
295
372
  * @return [String]
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
433
433
  const int n_threads,
434
434
  const int n_past,
435
435
  const std::vector<gpt_vocab::id> & embd_inp,
436
+ std::vector<float> & embd_d,
436
437
  std::vector<float> & embd_w,
437
438
  size_t & mem_per_token) {
438
439
  const int N = embd_inp.size();
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
657
658
  //embd_w.resize(n_vocab*N);
658
659
  //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
659
660
 
661
+ embd_d.resize(n_embd);
662
+ struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
663
+ memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
664
+
660
665
  // return result for just the last token
661
666
  embd_w.resize(n_vocab);
662
667
  memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
@@ -3,5 +3,5 @@
3
3
  # GPTNeoXClient is a Ruby client for GPT-NeoX.
4
4
  class GPTNeoXClient
5
5
  # The version of GPTNeoXClient you are using.
6
- VERSION = '0.2.0'
6
+ VERSION = '0.3.0'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gpt_neox_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-02 00:00:00.000000000 Z
11
+ date: 2023-09-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: gpt_neox_client is a simple client for GPT-NeoX.
14
14
  email: