RubyGems - gpt_neox_client - Versions diffs - 0.2.0 → 0.3.0 - Mend

gpt_neox_client 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/ext/gpt_neox_client/gpt_neox_client.cpp +79 -2
data/ext/gpt_neox_client/src/main.cpp +5 -0
data/lib/gpt_neox_client/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e76735b1c4c6a4e228620bd4cd3ab20d02d0b20505eb85acbcab263301ad4e49
-  data.tar.gz: 05d285d7b1daa24408c1087f0c748a456a8398d45c59b3b311e1d0a4413df00a
+  metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
+  data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
 SHA512:
-  metadata.gz: dda9974e3d4d1023ec0e8783922c6cb779b41d0083aa26bdfb73e69778de353eee9b26d5185ea0f160bec07df89ec9f34267dd2c42e02f0d95bc224fb4b4a43a
-  data.tar.gz: 0a2c389774a0e49b8b6f4ee2dac8bea96f5d1608ef4b4ac9cef95f29480fc23f4cbc19c14be97781e9076eee3a9fd247883c979390ff1fd2385159c42a12189e
+  metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
+  data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 ## [Unreleased]
+## [0.3.0] - 2023-09-06
+- Add `embeddings` method.
+  ```ruby
+  require 'gpt_neox_client'
+  client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
+  embd = client.embeddings('Hello, world.', normalize: true)
+  ```
 ## [0.2.0] - 2023-09-02
 - Add Accelerate framework and Metal build option for macOS.

data/ext/gpt_neox_client/gpt_neox_client.cpp CHANGED Viewed

@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
   const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
   const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
+  std::vector<float> embedding;
   std::vector<float> logits;
   size_t mem_per_token = 0;
-  gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
+  gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
   int n_past = 0;
   int n_consumed = 0;
@@ -212,7 +213,7 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
   while (n_sampled < n_predict) {
     if (embd.size() > 0) {
-      if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
+      if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
         rb_raise(rb_eRuntimeError, "failed to predict.");
         return Qnil;
       }
@@ -248,6 +249,66 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
   return rb_utf8_str_new_cstr(completions.c_str());
 }
+static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
+  VALUE prompt_ = Qnil;
+  VALUE kw_args = Qnil;
+  rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
+  ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
+  VALUE kw_values[2] = { Qundef, Qundef };
+  rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
+  if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
+    rb_raise(rb_eArgError, "n_batch must be an integer");
+    return Qnil;
+  }
+  std::string prompt(StringValueCStr(prompt_));
+  const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
+  const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
+  gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
+  gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
+  const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
+  std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
+  if (embd_inp.size() > model->hparams.n_ctx) {
+    rb_raise(rb_eArgError, "prompt is too long");
+    return Qnil;
+  }
+  std::vector<float> embedding;
+  std::vector<float> logits;
+  size_t mem_per_token = 0;
+  gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
+  int n_past = 0;
+  std::vector<gpt_vocab::id> embd;
+  while (!embd_inp.empty()) {
+    const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
+    embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
+    if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
+      rb_raise(rb_eRuntimeError, "failed to predict.");
+      return Qnil;
+    }
+    n_past += n_tokens;
+    embd.clear();
+    embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
+  }
+  if (normalize) {
+    const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
+    for (auto& v : embedding) v /= norm;
+  }
+  VALUE res = rb_ary_new2(embedding.size());
+  for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
+  RB_GC_GUARD(prompt_);
+  return res;
+}
 extern "C" void Init_gpt_neox_client(void) {
   /**
    * Document-class: GPTNeoXClient
@@ -290,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
    * @return [String]
    */
   rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
+  /**
+   * Generates embeddings.
+   *
+   * @example
+   *   require "gpt_neox_client"
+   *
+   *   client = GPTNeoXClient.new("gpt-neox-f16.bin")
+   *   client.embeddings("Hello, my name is")
+   *
+   * @overload embeddings(text, n_batch: 8, normalize: false)
+   *   @param [String] text The text.
+   *   @param [Integer] n_batch The number of tokens to evalauate at once.
+   *   @param [Boolean] normalize The flag to normalize the embeddings.
+   * @return [Array<Float>]
+   */
+  rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
   /**
    * Returns the path to the model.
    * @return [String]

data/ext/gpt_neox_client/src/main.cpp CHANGED Viewed

@@ -433,6 +433,7 @@ bool gpt_neox_eval(
         const int n_threads,
         const int n_past,
         const std::vector<gpt_vocab::id> & embd_inp,
+              std::vector<float>         & embd_d,
               std::vector<float>         & embd_w,
               size_t                     & mem_per_token) {
     const int N = embd_inp.size();
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
     //embd_w.resize(n_vocab*N);
     //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
+    embd_d.resize(n_embd);
+    struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
+    memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
     // return result for just the last token
     embd_w.resize(n_vocab);
     memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);

data/lib/gpt_neox_client/version.rb CHANGED Viewed

@@ -3,5 +3,5 @@
 # GPTNeoXClient is a Ruby client for GPT-NeoX.
 class GPTNeoXClient
   # The version of GPTNeoXClient you are using.
-  VERSION = '0.2.0'
+  VERSION = '0.3.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gpt_neox_client
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-09-02 00:00:00.000000000 Z
+date: 2023-09-06 00:00:00.000000000 Z
 dependencies: []
 description: gpt_neox_client is a simple client for GPT-NeoX.
 email: