gpt_neox_client 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d77a992f9cbba2e7a91141c859194cc0a200b9c5dd4e58aeedc51482ec75b8c0
4
- data.tar.gz: 302c37b125a0875463859b62fb7918b47c38cd521ad444fe758a36ba06e27ade
3
+ metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
4
+ data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
5
5
  SHA512:
6
- metadata.gz: a3ab1eb43db87f08e24fb16181d83feeb4b1421fd601165a233cc48eccfd0403de2e1d98042e89e259c843e5c3021e3931d57f4742ee20ae8c7e55cf45f6c0d4
7
- data.tar.gz: 88f961e5a901ea5896486b4612ab3ecc8ebcbad12726fd76700bb1248e31c11716ae4a7248592f7657fb056e36af19f466f95860862a4d165fc026df3d4cb04f
6
+ metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
7
+ data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2023-09-xx
3
+ ## [0.3.0] - 2023-09-06
4
+
5
+ - Add `embeddings` method.
6
+ ```ruby
7
+ require 'gpt_neox_client'
8
+
9
+ client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
10
+ embd = client.embeddings('Hello, world.', normalize: true)
11
+ ```
12
+
13
+ ## [0.2.0] - 2023-09-02
14
+
15
+ - Add Accelerate framework and Metal build option for macOS.
16
+ - Add OpenBLAS build option for platforms other than macOS.
17
+
18
+ ## [0.1.0] - 2023-09-01
4
19
 
5
20
  - Initial release
data/README.md CHANGED
@@ -38,15 +38,15 @@ japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin
38
38
  ```
39
39
 
40
40
  ```ruby
41
- require "gpt_neox_client"
41
+ require 'gpt_neox_client'
42
42
 
43
- client = GPTNeoXClient.new('japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
43
+ client = GPTNeoXClient.new(path: 'japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
44
44
  puts client.completions(
45
45
  'ユーザー:四国の県名を全て列挙してください。<0x0A>システム:',
46
46
  top_p: 0.9,
47
47
  top_k: 1,
48
48
  temperature: 0.7
49
- ).gsub("<0x0A>", "\n").gsub("</s>", " ")
49
+ ).gsub('<0x0A>', "\n").gsub('</s>', '')
50
50
  #
51
51
  # ユーザー:四国の県名を全て列挙してください。
52
52
  # システム:徳島県、香川県、愛媛県、高知県
@@ -22,4 +22,30 @@ $INCFLAGS << ' -I$(srcdir)/src/ggml'
22
22
  $VPATH << '$(srcdir)/src'
23
23
  $VPATH << '$(srcdir)/src/ggml'
24
24
 
25
+ if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
26
+ $CFLAGS << ' -pthread'
27
+ $CXXFLAGS << ' -pthread'
28
+ end
29
+
30
+ if RUBY_PLATFORM.match?(/darwin/)
31
+ if have_framework('Accelerate')
32
+ $CFLAGS << ' -DGGML_USE_ACCELERATE'
33
+ else
34
+ warning 'Accelerate framework is not found.'
35
+ end
36
+ end
37
+
38
+ $CFLAGS << ' -DGGML_USE_OPENBLAS' if !RUBY_PLATFORM.match?(/darwin/) && (have_library('openblas') && have_header('cblas.h'))
39
+
25
40
  create_makefile('gpt_neox_client/gpt_neox_client')
41
+
42
+ if RUBY_PLATFORM.match?(/darwin/)
43
+ File.open('Makefile', 'a') do |f|
44
+ f.puts "\nggml-metal.o: ggml-metal.m ggml-metal.h"
45
+ f.puts "\t$(CC) $(CFLAGS) -c $< -o $@"
46
+ end
47
+
48
+ metal_path = File.expand_path("#{__dir__}/src/ggml/ggml-metal.metal")
49
+ dest_path = File.expand_path("#{__dir__}/../../lib/gpt_neox_client/")
50
+ FileUtils.cp(metal_path, dest_path)
51
+ end
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
196
196
  const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
197
197
 
198
198
  const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
199
+ std::vector<float> embedding;
199
200
  std::vector<float> logits;
200
201
  size_t mem_per_token = 0;
201
- gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
202
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
202
203
 
203
204
  int n_past = 0;
204
205
  int n_consumed = 0;
@@ -208,10 +209,11 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
208
209
  std::mt19937 rng(seed);
209
210
  std::vector<gpt_vocab::id> embd;
210
211
  std::vector<int32_t> last_n_tokens(model->hparams.n_ctx, 0);
212
+ gpt_vocab::id token_eos = vocab->token_to_id["</s>"];
211
213
 
212
214
  while (n_sampled < n_predict) {
213
215
  if (embd.size() > 0) {
214
- if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
216
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
215
217
  rb_raise(rb_eRuntimeError, "failed to predict.");
216
218
  return Qnil;
217
219
  }
@@ -240,13 +242,73 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
240
242
  }
241
243
 
242
244
  for (auto id : embd) completions += vocab->id_to_token[id];
243
- if (embd.back() == 0) break;
245
+ if (!embd.empty() && embd.back() == token_eos) break;
244
246
  }
245
247
 
246
248
  RB_GC_GUARD(prompt_);
247
249
  return rb_utf8_str_new_cstr(completions.c_str());
248
250
  }
249
251
 
252
+ static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
253
+ VALUE prompt_ = Qnil;
254
+ VALUE kw_args = Qnil;
255
+ rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
256
+
257
+ ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
258
+ VALUE kw_values[2] = { Qundef, Qundef };
259
+ rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
260
+
261
+ if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
262
+ rb_raise(rb_eArgError, "n_batch must be an integer");
263
+ return Qnil;
264
+ }
265
+
266
+ std::string prompt(StringValueCStr(prompt_));
267
+ const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
268
+ const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
269
+
270
+ gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
271
+ gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
272
+ const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
273
+
274
+ std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
275
+
276
+ if (embd_inp.size() > model->hparams.n_ctx) {
277
+ rb_raise(rb_eArgError, "prompt is too long");
278
+ return Qnil;
279
+ }
280
+
281
+ std::vector<float> embedding;
282
+ std::vector<float> logits;
283
+ size_t mem_per_token = 0;
284
+ gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
285
+
286
+ int n_past = 0;
287
+ std::vector<gpt_vocab::id> embd;
288
+ while (!embd_inp.empty()) {
289
+ const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
290
+ embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
291
+ if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
292
+ rb_raise(rb_eRuntimeError, "failed to predict.");
293
+ return Qnil;
294
+ }
295
+ n_past += n_tokens;
296
+ embd.clear();
297
+ embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
298
+ }
299
+
300
+ if (normalize) {
301
+ const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
302
+ for (auto& v : embedding) v /= norm;
303
+ }
304
+
305
+ VALUE res = rb_ary_new2(embedding.size());
306
+ for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
307
+
308
+ RB_GC_GUARD(prompt_);
309
+ return res;
310
+ }
311
+
250
312
  extern "C" void Init_gpt_neox_client(void) {
251
313
  /**
252
314
  * Document-class: GPTNeoXClient
@@ -289,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
289
351
  * @return [String]
290
352
  */
291
353
  rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
354
+ /**
355
+ * Generates embeddings.
356
+ *
357
+ * @example
358
+ * require "gpt_neox_client"
359
+ *
360
+ * client = GPTNeoXClient.new("gpt-neox-f16.bin")
361
+ * client.embeddings("Hello, my name is")
362
+ *
363
+ * @overload embeddings(text, n_batch: 8, normalize: false)
364
+ * @param [String] text The text.
365
+ * @param [Integer] n_batch The number of tokens to evalauate at once.
366
+ * @param [Boolean] normalize The flag to normalize the embeddings.
367
+ * @return [Array<Float>]
368
+ */
369
+ rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
292
370
  /**
293
371
  * Returns the path to the model.
294
372
  * @return [String]
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
433
433
  const int n_threads,
434
434
  const int n_past,
435
435
  const std::vector<gpt_vocab::id> & embd_inp,
436
+ std::vector<float> & embd_d,
436
437
  std::vector<float> & embd_w,
437
438
  size_t & mem_per_token) {
438
439
  const int N = embd_inp.size();
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
657
658
  //embd_w.resize(n_vocab*N);
658
659
  //memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
659
660
 
661
+ embd_d.resize(n_embd);
662
+ struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
663
+ memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
664
+
660
665
  // return result for just the last token
661
666
  embd_w.resize(n_vocab);
662
667
  memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
@@ -3,5 +3,5 @@
3
3
  # GPTNeoXClient is a Ruby client for GPT-NeoX.
4
4
  class GPTNeoXClient
5
5
  # The version of GPTNeoXClient you are using.
6
- VERSION = '0.1.0'
6
+ VERSION = '0.3.0'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gpt_neox_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-01 00:00:00.000000000 Z
11
+ date: 2023-09-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: gpt_neox_client is a simple client for GPT-NeoX.
14
14
  email: