RubyGems - gpt_neox_client - Versions diffs - 0.2.0 → 0.3.0 - Mend

gpt_neox_client 0.2.0 → 0.3.0

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/ext/gpt_neox_client/gpt_neox_client.cpp +79 -2
data/ext/gpt_neox_client/src/main.cpp +5 -0
data/lib/gpt_neox_client/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e76735b1c4c6a4e228620bd4cd3ab20d02d0b20505eb85acbcab263301ad4e49
-  data.tar.gz: 05d285d7b1daa24408c1087f0c748a456a8398d45c59b3b311e1d0a4413df00a
+  metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
+  data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
 SHA512:
-  metadata.gz: dda9974e3d4d1023ec0e8783922c6cb779b41d0083aa26bdfb73e69778de353eee9b26d5185ea0f160bec07df89ec9f34267dd2c42e02f0d95bc224fb4b4a43a
-  data.tar.gz: 0a2c389774a0e49b8b6f4ee2dac8bea96f5d1608ef4b4ac9cef95f29480fc23f4cbc19c14be97781e9076eee3a9fd247883c979390ff1fd2385159c42a12189e
+  metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
+  data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 ## [Unreleased]
+## [0.3.0] - 2023-09-06
+- Add `embeddings` method.
+  ```ruby
+  require 'gpt_neox_client'
+  client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
+  embd = client.embeddings('Hello, world.', normalize: true)
+  ```
 ## [0.2.0] - 2023-09-02
 - Add Accelerate framework and Metal build option for macOS.

data/ext/gpt_neox_client/gpt_neox_client.cpp CHANGED Viewed

@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
   const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
   const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
+  std::vector<float> embedding;
   std::vector<float> logits;
   size_t mem_per_token = 0;
-  gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
+  gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
   int n_past = 0;
   int n_consumed = 0;
@@ -212,7 +213,7 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
   while (n_sampled < n_predict) {
     if (embd.size() > 0) {
-      if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
+      if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
         rb_raise(rb_eRuntimeError, "failed to predict.");
         return Qnil;
       }
@@ -248,6 +249,66 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
   return rb_utf8_str_new_cstr(completions.c_str());
 }
+static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
+  VALUE prompt_ = Qnil;
+  VALUE kw_args = Qnil;
+  rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
+  ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
+  VALUE kw_values[2] = { Qundef, Qundef };
+  rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
+  if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
+    rb_raise(rb_eArgError, "n_batch must be an integer");
+    return Qnil;
+  }
+  std::string prompt(StringValueCStr(prompt_));
+  const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
+  const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
+  gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
+  gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
+  const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
+  std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
+  if (embd_inp.size() > model->hparams.n_ctx) {
+    rb_raise(rb_eArgError, "prompt is too long");
+    return Qnil;
+  }
+  std::vector<float> embedding;
+  std::vector<float> logits;
+  size_t mem_per_token = 0;
+  gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
+  int n_past = 0;
+  std::vector<gpt_vocab::id> embd;
+  while (!embd_inp.empty()) {
+    const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
+    embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
+    if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
+      rb_raise(rb_eRuntimeError, "failed to predict.");
+      return Qnil;
+    }
+    n_past += n_tokens;
+    embd.clear();
+    embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
+  }
+  if (normalize) {
+    const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
+    for (auto& v : embedding) v /= norm;
+  }
+  VALUE res = rb_ary_new2(embedding.size());
+  for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
+  RB_GC_GUARD(prompt_);
+  return res;
+}
 extern "C" void Init_gpt_neox_client(void) {
   /**
    * Document-class: GPTNeoXClient
@@ -290,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
    * @return [String]
    */
   rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
+  /**
+   * Generates embeddings.
+   *
+   * @example
+   *   require "gpt_neox_client"
+   *
+   *   client = GPTNeoXClient.new("gpt-neox-f16.bin")
+   *   client.embeddings("Hello, my name is")
+   *
+   * @overload embeddings(text, n_batch: 8, normalize: false)
+   *   @param [String] text The text.
+   *   @param [Integer] n_batch The number of tokens to evalauate at once.
+   *   @param [Boolean] normalize The flag to normalize the embeddings.
+   * @return [Array<Float>]
+   */
+  rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
   /**
    * Returns the path to the model.
    * @return [String]

data/ext/gpt_neox_client/src/main.cpp CHANGED Viewed

@@ -433,6 +433,7 @@ bool gpt_neox_eval(
         const int n_threads,
         const int n_past,
         const std::vector<gpt_vocab::id> & embd_inp,
+              std::vector<float>         & embd_d,
               std::vector<float>         & embd_w,
               size_t                     & mem_per_token) {
     const int N = embd_inp.size();
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
     //embd_w.resize(n_vocab*N);
     //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
+    embd_d.resize(n_embd);
+    struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
+    memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
     // return result for just the last token
     embd_w.resize(n_vocab);
     memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);

data/lib/gpt_neox_client/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # GPTNeoXClient is a Ruby client for GPT-NeoX.
 class GPTNeoXClient
   # The version of GPTNeoXClient you are using.
-  VERSION = '0.2.0'
+  VERSION = '0.3.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gpt_neox_client
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-09-02 00:00:00.000000000 Z
+date: 2023-09-06 00:00:00.000000000 Z
 dependencies: []
 description: gpt_neox_client is a simple client for GPT-NeoX.
 email: