gpt_neox_client 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/README.md +3 -3
- data/ext/gpt_neox_client/extconf.rb +26 -0
- data/ext/gpt_neox_client/gpt_neox_client.cpp +81 -3
- data/ext/gpt_neox_client/src/main.cpp +5 -0
- data/lib/gpt_neox_client/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
|
4
|
+
data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
|
7
|
+
data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.
|
3
|
+
## [0.3.0] - 2023-09-06
|
4
|
+
|
5
|
+
- Add `embeddings` method.
|
6
|
+
```ruby
|
7
|
+
require 'gpt_neox_client'
|
8
|
+
|
9
|
+
client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
|
10
|
+
embd = client.embeddings('Hello, world.', normalize: true)
|
11
|
+
```
|
12
|
+
|
13
|
+
## [0.2.0] - 2023-09-02
|
14
|
+
|
15
|
+
- Add Accelerate framework and Metal build option for macOS.
|
16
|
+
- Add OpenBLAS build option for platforms other than macOS.
|
17
|
+
|
18
|
+
## [0.1.0] - 2023-09-01
|
4
19
|
|
5
20
|
- Initial release
|
data/README.md
CHANGED
@@ -38,15 +38,15 @@ japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin
|
|
38
38
|
```
|
39
39
|
|
40
40
|
```ruby
|
41
|
-
require
|
41
|
+
require 'gpt_neox_client'
|
42
42
|
|
43
|
-
client = GPTNeoXClient.new('japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
|
43
|
+
client = GPTNeoXClient.new(path: 'japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
|
44
44
|
puts client.completions(
|
45
45
|
'ユーザー:四国の県名を全て列挙してください。<0x0A>システム:',
|
46
46
|
top_p: 0.9,
|
47
47
|
top_k: 1,
|
48
48
|
temperature: 0.7
|
49
|
-
).gsub(
|
49
|
+
).gsub('<0x0A>', "\n").gsub('</s>', '')
|
50
50
|
#
|
51
51
|
# ユーザー:四国の県名を全て列挙してください。
|
52
52
|
# システム:徳島県、香川県、愛媛県、高知県
|
@@ -22,4 +22,30 @@ $INCFLAGS << ' -I$(srcdir)/src/ggml'
|
|
22
22
|
$VPATH << '$(srcdir)/src'
|
23
23
|
$VPATH << '$(srcdir)/src/ggml'
|
24
24
|
|
25
|
+
if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
|
26
|
+
$CFLAGS << ' -pthread'
|
27
|
+
$CXXFLAGS << ' -pthread'
|
28
|
+
end
|
29
|
+
|
30
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
31
|
+
if have_framework('Accelerate')
|
32
|
+
$CFLAGS << ' -DGGML_USE_ACCELERATE'
|
33
|
+
else
|
34
|
+
warning 'Accelerate framework is not found.'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS' if !RUBY_PLATFORM.match?(/darwin/) && (have_library('openblas') && have_header('cblas.h'))
|
39
|
+
|
25
40
|
create_makefile('gpt_neox_client/gpt_neox_client')
|
41
|
+
|
42
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
43
|
+
File.open('Makefile', 'a') do |f|
|
44
|
+
f.puts "\nggml-metal.o: ggml-metal.m ggml-metal.h"
|
45
|
+
f.puts "\t$(CC) $(CFLAGS) -c $< -o $@"
|
46
|
+
end
|
47
|
+
|
48
|
+
metal_path = File.expand_path("#{__dir__}/src/ggml/ggml-metal.metal")
|
49
|
+
dest_path = File.expand_path("#{__dir__}/../../lib/gpt_neox_client/")
|
50
|
+
FileUtils.cp(metal_path, dest_path)
|
51
|
+
end
|
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
196
196
|
const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
|
197
197
|
|
198
198
|
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
199
|
+
std::vector<float> embedding;
|
199
200
|
std::vector<float> logits;
|
200
201
|
size_t mem_per_token = 0;
|
201
|
-
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
|
202
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
202
203
|
|
203
204
|
int n_past = 0;
|
204
205
|
int n_consumed = 0;
|
@@ -208,10 +209,11 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
208
209
|
std::mt19937 rng(seed);
|
209
210
|
std::vector<gpt_vocab::id> embd;
|
210
211
|
std::vector<int32_t> last_n_tokens(model->hparams.n_ctx, 0);
|
212
|
+
gpt_vocab::id token_eos = vocab->token_to_id["</s>"];
|
211
213
|
|
212
214
|
while (n_sampled < n_predict) {
|
213
215
|
if (embd.size() > 0) {
|
214
|
-
if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
|
216
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
215
217
|
rb_raise(rb_eRuntimeError, "failed to predict.");
|
216
218
|
return Qnil;
|
217
219
|
}
|
@@ -240,13 +242,73 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
240
242
|
}
|
241
243
|
|
242
244
|
for (auto id : embd) completions += vocab->id_to_token[id];
|
243
|
-
if (embd.back() ==
|
245
|
+
if (!embd.empty() && embd.back() == token_eos) break;
|
244
246
|
}
|
245
247
|
|
246
248
|
RB_GC_GUARD(prompt_);
|
247
249
|
return rb_utf8_str_new_cstr(completions.c_str());
|
248
250
|
}
|
249
251
|
|
252
|
+
static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
|
253
|
+
VALUE prompt_ = Qnil;
|
254
|
+
VALUE kw_args = Qnil;
|
255
|
+
rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
|
256
|
+
|
257
|
+
ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
|
258
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
259
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
260
|
+
|
261
|
+
if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
|
262
|
+
rb_raise(rb_eArgError, "n_batch must be an integer");
|
263
|
+
return Qnil;
|
264
|
+
}
|
265
|
+
|
266
|
+
std::string prompt(StringValueCStr(prompt_));
|
267
|
+
const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
|
268
|
+
const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
269
|
+
|
270
|
+
gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
|
271
|
+
gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
|
272
|
+
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
273
|
+
|
274
|
+
std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
|
275
|
+
|
276
|
+
if (embd_inp.size() > model->hparams.n_ctx) {
|
277
|
+
rb_raise(rb_eArgError, "prompt is too long");
|
278
|
+
return Qnil;
|
279
|
+
}
|
280
|
+
|
281
|
+
std::vector<float> embedding;
|
282
|
+
std::vector<float> logits;
|
283
|
+
size_t mem_per_token = 0;
|
284
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
285
|
+
|
286
|
+
int n_past = 0;
|
287
|
+
std::vector<gpt_vocab::id> embd;
|
288
|
+
while (!embd_inp.empty()) {
|
289
|
+
const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
|
290
|
+
embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
|
291
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
292
|
+
rb_raise(rb_eRuntimeError, "failed to predict.");
|
293
|
+
return Qnil;
|
294
|
+
}
|
295
|
+
n_past += n_tokens;
|
296
|
+
embd.clear();
|
297
|
+
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
|
298
|
+
}
|
299
|
+
|
300
|
+
if (normalize) {
|
301
|
+
const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
|
302
|
+
for (auto& v : embedding) v /= norm;
|
303
|
+
}
|
304
|
+
|
305
|
+
VALUE res = rb_ary_new2(embedding.size());
|
306
|
+
for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
|
307
|
+
|
308
|
+
RB_GC_GUARD(prompt_);
|
309
|
+
return res;
|
310
|
+
}
|
311
|
+
|
250
312
|
extern "C" void Init_gpt_neox_client(void) {
|
251
313
|
/**
|
252
314
|
* Document-class: GPTNeoXClient
|
@@ -289,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
|
|
289
351
|
* @return [String]
|
290
352
|
*/
|
291
353
|
rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
|
354
|
+
/**
|
355
|
+
* Generates embeddings.
|
356
|
+
*
|
357
|
+
* @example
|
358
|
+
* require "gpt_neox_client"
|
359
|
+
*
|
360
|
+
* client = GPTNeoXClient.new("gpt-neox-f16.bin")
|
361
|
+
* client.embeddings("Hello, my name is")
|
362
|
+
*
|
363
|
+
* @overload embeddings(text, n_batch: 8, normalize: false)
|
364
|
+
* @param [String] text The text.
|
365
|
+
* @param [Integer] n_batch The number of tokens to evalauate at once.
|
366
|
+
* @param [Boolean] normalize The flag to normalize the embeddings.
|
367
|
+
* @return [Array<Float>]
|
368
|
+
*/
|
369
|
+
rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
|
292
370
|
/**
|
293
371
|
* Returns the path to the model.
|
294
372
|
* @return [String]
|
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
|
|
433
433
|
const int n_threads,
|
434
434
|
const int n_past,
|
435
435
|
const std::vector<gpt_vocab::id> & embd_inp,
|
436
|
+
std::vector<float> & embd_d,
|
436
437
|
std::vector<float> & embd_w,
|
437
438
|
size_t & mem_per_token) {
|
438
439
|
const int N = embd_inp.size();
|
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
|
|
657
658
|
//embd_w.resize(n_vocab*N);
|
658
659
|
//memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
|
659
660
|
|
661
|
+
embd_d.resize(n_embd);
|
662
|
+
struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
|
663
|
+
memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
|
664
|
+
|
660
665
|
// return result for just the last token
|
661
666
|
embd_w.resize(n_vocab);
|
662
667
|
memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gpt_neox_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: gpt_neox_client is a simple client for GPT-NeoX.
|
14
14
|
email:
|