gpt_neox_client 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d77a992f9cbba2e7a91141c859194cc0a200b9c5dd4e58aeedc51482ec75b8c0
4
- data.tar.gz: 302c37b125a0875463859b62fb7918b47c38cd521ad444fe758a36ba06e27ade
3
+ metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
4
+ data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
5
5
  SHA512:
6
- metadata.gz: a3ab1eb43db87f08e24fb16181d83feeb4b1421fd601165a233cc48eccfd0403de2e1d98042e89e259c843e5c3021e3931d57f4742ee20ae8c7e55cf45f6c0d4
7
- data.tar.gz: 88f961e5a901ea5896486b4612ab3ecc8ebcbad12726fd76700bb1248e31c11716ae4a7248592f7657fb056e36af19f466f95860862a4d165fc026df3d4cb04f
6
+ metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
7
+ data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2023-09-xx
3
+ ## [0.3.0] - 2023-09-06
4
+
5
+ - Add `embeddings` method.
6
+ ```ruby
7
+ require 'gpt_neox_client'
8
+
9
+ client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
10
+ embd = client.embeddings('Hello, world.', normalize: true)
11
+ ```
12
+
13
+ ## [0.2.0] - 2023-09-02
14
+
15
+ - Add Accelerate framework and Metal build option for macOS.
16
+ - Add OpenBLAS build option for platforms other than macOS.
17
+
18
+ ## [0.1.0] - 2023-09-01
4
19
 
5
20
  - Initial release
data/README.md CHANGED
@@ -38,15 +38,15 @@ japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin
38
38
  ```
39
39
 
40
40
  ```ruby
41
- require "gpt_neox_client"
41
+ require 'gpt_neox_client'
42
42
 
43
- client = GPTNeoXClient.new('japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
43
+ client = GPTNeoXClient.new(path: 'japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
44
44
  puts client.completions(
45
45
  'ユーザー:四国の県名を全て列挙してください。<0x0A>システム:',
46
46
  top_p: 0.9,
47
47
  top_k: 1,
48
48
  temperature: 0.7
49
- ).gsub("<0x0A>", "\n").gsub("</s>", " ")
49
+ ).gsub('<0x0A>', "\n").gsub('</s>', '')
50
50
  #
51
51
  # ユーザー:四国の県名を全て列挙してください。
52
52
  # システム:徳島県、香川県、愛媛県、高知県
@@ -22,4 +22,30 @@ $INCFLAGS << ' -I$(srcdir)/src/ggml'
22
22
  $VPATH << '$(srcdir)/src'
23
23
  $VPATH << '$(srcdir)/src/ggml'
24
24
 
25
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
26
+ $CFLAGS << ' -pthread'
27
+ $CXXFLAGS << ' -pthread'
28
+ end
29
+
30
+ if RUBY_PLATFORM.match?(/darwin/)
31
+ if have_framework('Accelerate')
32
+ $CFLAGS << ' -DGGML_USE_ACCELERATE'
33
+ else
34
+ warning 'Accelerate framework is not found.'
35
+ end
36
+ end
37
+
38
+ $CFLAGS << ' -DGGML_USE_OPENBLAS' if !RUBY_PLATFORM.match?(/darwin/) && (have_library('openblas') && have_header('cblas.h'))
39
+
25
40
  create_makefile('gpt_neox_client/gpt_neox_client')
41
+
42
+ if RUBY_PLATFORM.match?(/darwin/)
43
+ File.open('Makefile', 'a') do |f|
44
+ f.puts "\nggml-metal.o: ggml-metal.m ggml-metal.h"
45
+ f.puts "\t$(CC) $(CFLAGS) -c $< -o $@"
46
+ end
47
+
48
+ metal_path = File.expand_path("#{__dir__}/src/ggml/ggml-metal.metal")
49
+ dest_path = File.expand_path("#{__dir__}/../../lib/gpt_neox_client/")
50
+ FileUtils.cp(metal_path, dest_path)
51
+ end
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
196
196
  const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
197
197
 
198
198
  const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
199
+ std::vector<float> embedding;
199
200
  std::vector<float> logits;
200
201
  size_t mem_per_token = 0;
201
- gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
202
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
202
203
 
203
204
  int n_past = 0;
204
205
  int n_consumed = 0;
@@ -208,10 +209,11 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
208
209
  std::mt19937 rng(seed);
209
210
  std::vector<gpt_vocab::id> embd;
210
211
  std::vector<int32_t> last_n_tokens(model->hparams.n_ctx, 0);
212
+ gpt_vocab::id token_eos = vocab->token_to_id["</s>"];
211
213
 
212
214
  while (n_sampled < n_predict) {
213
215
  if (embd.size() > 0) {
214
- if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
216
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
215
217
  rb_raise(rb_eRuntimeError, "failed to predict.");
216
218
  return Qnil;
217
219
  }
@@ -240,13 +242,73 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
240
242
  }
241
243
 
242
244
  for (auto id : embd) completions += vocab->id_to_token[id];
243
- if (embd.back() == 0) break;
245
+ if (!embd.empty() && embd.back() == token_eos) break;
244
246
  }
245
247
 
246
248
  RB_GC_GUARD(prompt_);
247
249
  return rb_utf8_str_new_cstr(completions.c_str());
248
250
  }
249
251
 
252
+ static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
253
+ VALUE prompt_ = Qnil;
254
+ VALUE kw_args = Qnil;
255
+ rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
256
+
257
+ ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
258
+ VALUE kw_values[2] = { Qundef, Qundef };
259
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
260
+
261
+ if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
262
+ rb_raise(rb_eArgError, "n_batch must be an integer");
263
+ return Qnil;
264
+ }
265
+
266
+ std::string prompt(StringValueCStr(prompt_));
267
+ const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
268
+ const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
269
+
270
+ gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
271
+ gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
272
+ const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
273
+
274
+ std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
275
+
276
+ if (embd_inp.size() > model->hparams.n_ctx) {
277
+ rb_raise(rb_eArgError, "prompt is too long");
278
+ return Qnil;
279
+ }
280
+
281
+ std::vector<float> embedding;
282
+ std::vector<float> logits;
283
+ size_t mem_per_token = 0;
284
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
285
+
286
+ int n_past = 0;
287
+ std::vector<gpt_vocab::id> embd;
288
+ while (!embd_inp.empty()) {
289
+ const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
290
+ embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
291
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
292
+ rb_raise(rb_eRuntimeError, "failed to predict.");
293
+ return Qnil;
294
+ }
295
+ n_past += n_tokens;
296
+ embd.clear();
297
+ embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
298
+ }
299
+
300
+ if (normalize) {
301
+ const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
302
+ for (auto& v : embedding) v /= norm;
303
+ }
304
+
305
+ VALUE res = rb_ary_new2(embedding.size());
306
+ for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
307
+
308
+ RB_GC_GUARD(prompt_);
309
+ return res;
310
+ }
311
+
250
312
  extern "C" void Init_gpt_neox_client(void) {
251
313
  /**
252
314
  * Document-class: GPTNeoXClient
@@ -289,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
289
351
  * @return [String]
290
352
  */
291
353
  rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
354
+ /**
355
+ * Generates embeddings.
356
+ *
357
+ * @example
358
+ * require "gpt_neox_client"
359
+ *
360
+ * client = GPTNeoXClient.new("gpt-neox-f16.bin")
361
+ * client.embeddings("Hello, my name is")
362
+ *
363
+ * @overload embeddings(text, n_batch: 8, normalize: false)
364
+ * @param [String] text The text.
365
+ * @param [Integer] n_batch The number of tokens to evalauate at once.
366
+ * @param [Boolean] normalize The flag to normalize the embeddings.
367
+ * @return [Array<Float>]
368
+ */
369
+ rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
292
370
  /**
293
371
  * Returns the path to the model.
294
372
  * @return [String]
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
433
433
  const int n_threads,
434
434
  const int n_past,
435
435
  const std::vector<gpt_vocab::id> & embd_inp,
436
+ std::vector<float> & embd_d,
436
437
  std::vector<float> & embd_w,
437
438
  size_t & mem_per_token) {
438
439
  const int N = embd_inp.size();
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
657
658
  //embd_w.resize(n_vocab*N);
658
659
  //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
659
660
 
661
+ embd_d.resize(n_embd);
662
+ struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
663
+ memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
664
+
660
665
  // return result for just the last token
661
666
  embd_w.resize(n_vocab);
662
667
  memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
@@ -3,5 +3,5 @@
3
3
  # GPTNeoXClient is a Ruby client for GPT-NeoX.
4
4
  class GPTNeoXClient
5
5
  # The version of GPTNeoXClient you are using.
6
- VERSION = '0.1.0'
6
+ VERSION = '0.3.0'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gpt_neox_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-01 00:00:00.000000000 Z
11
+ date: 2023-09-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: gpt_neox_client is a simple client for GPT-NeoX.
14
14
  email: