gpt_neox_client 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/README.md +3 -3
- data/ext/gpt_neox_client/extconf.rb +26 -0
- data/ext/gpt_neox_client/gpt_neox_client.cpp +81 -3
- data/ext/gpt_neox_client/src/main.cpp +5 -0
- data/lib/gpt_neox_client/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d90a7d8178a4974871638030ee67311c7ec38c169810c0c4b583d4cd1d697a
|
4
|
+
data.tar.gz: 3c85344089c5f1048524b1163cf956c4b723a0c29b771706a3b514c7bc2088aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f0ac814530db33cd077505b093a5ec1fa1cfe715541c5210d81c47ce34e927128422c503a5f6f55474d5200c59e566e7d41c10243518897a98e4535d0588f5d
|
7
|
+
data.tar.gz: efad11b4aebd6b07070ab6d9b043f51832a24c23a8a6adde52167df8e0128339f063d1375564ded5f775e953ef3543b1f1eda20dbf3ab0313a68380396298b6a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
-
## [0.
|
3
|
+
## [0.3.0] - 2023-09-06
|
4
|
+
|
5
|
+
- Add `embeddings` method.
|
6
|
+
```ruby
|
7
|
+
require 'gpt_neox_client'
|
8
|
+
|
9
|
+
client = GPTNeoXClient.new(path: '/path/to/ggml-model-f16.bin', , seed: 123456789, n_threads: 8)
|
10
|
+
embd = client.embeddings('Hello, world.', normalize: true)
|
11
|
+
```
|
12
|
+
|
13
|
+
## [0.2.0] - 2023-09-02
|
14
|
+
|
15
|
+
- Add Accelerate framework and Metal build option for macOS.
|
16
|
+
- Add OpenBLAS build option for platforms other than macOS.
|
17
|
+
|
18
|
+
## [0.1.0] - 2023-09-01
|
4
19
|
|
5
20
|
- Initial release
|
data/README.md
CHANGED
@@ -38,15 +38,15 @@ japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin
|
|
38
38
|
```
|
39
39
|
|
40
40
|
```ruby
|
41
|
-
require
|
41
|
+
require 'gpt_neox_client'
|
42
42
|
|
43
|
-
client = GPTNeoXClient.new('japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
|
43
|
+
client = GPTNeoXClient.new(path: 'japanese-large-lm-3.6b-instruction-sft/ggml-model-f16.bin', seed: 123456789, n_threads: 4)
|
44
44
|
puts client.completions(
|
45
45
|
'ユーザー:四国の県名を全て列挙してください。<0x0A>システム:',
|
46
46
|
top_p: 0.9,
|
47
47
|
top_k: 1,
|
48
48
|
temperature: 0.7
|
49
|
-
).gsub(
|
49
|
+
).gsub('<0x0A>', "\n").gsub('</s>', '')
|
50
50
|
#
|
51
51
|
# ユーザー:四国の県名を全て列挙してください。
|
52
52
|
# システム:徳島県、香川県、愛媛県、高知県
|
@@ -22,4 +22,30 @@ $INCFLAGS << ' -I$(srcdir)/src/ggml'
|
|
22
22
|
$VPATH << '$(srcdir)/src'
|
23
23
|
$VPATH << '$(srcdir)/src/ggml'
|
24
24
|
|
25
|
+
if RUBY_PLATFORM.match?(/darwin|linux|bsd/) && try_compile('#include <stdio.h>', '-pthread')
|
26
|
+
$CFLAGS << ' -pthread'
|
27
|
+
$CXXFLAGS << ' -pthread'
|
28
|
+
end
|
29
|
+
|
30
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
31
|
+
if have_framework('Accelerate')
|
32
|
+
$CFLAGS << ' -DGGML_USE_ACCELERATE'
|
33
|
+
else
|
34
|
+
warning 'Accelerate framework is not found.'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
$CFLAGS << ' -DGGML_USE_OPENBLAS' if !RUBY_PLATFORM.match?(/darwin/) && (have_library('openblas') && have_header('cblas.h'))
|
39
|
+
|
25
40
|
create_makefile('gpt_neox_client/gpt_neox_client')
|
41
|
+
|
42
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
43
|
+
File.open('Makefile', 'a') do |f|
|
44
|
+
f.puts "\nggml-metal.o: ggml-metal.m ggml-metal.h"
|
45
|
+
f.puts "\t$(CC) $(CFLAGS) -c $< -o $@"
|
46
|
+
end
|
47
|
+
|
48
|
+
metal_path = File.expand_path("#{__dir__}/src/ggml/ggml-metal.metal")
|
49
|
+
dest_path = File.expand_path("#{__dir__}/../../lib/gpt_neox_client/")
|
50
|
+
FileUtils.cp(metal_path, dest_path)
|
51
|
+
end
|
@@ -196,9 +196,10 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
196
196
|
const int n_predict = std::min(n_predict_, model->hparams.n_ctx - static_cast<int>(embd_inp.size()));
|
197
197
|
|
198
198
|
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
199
|
+
std::vector<float> embedding;
|
199
200
|
std::vector<float> logits;
|
200
201
|
size_t mem_per_token = 0;
|
201
|
-
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
|
202
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
202
203
|
|
203
204
|
int n_past = 0;
|
204
205
|
int n_consumed = 0;
|
@@ -208,10 +209,11 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
208
209
|
std::mt19937 rng(seed);
|
209
210
|
std::vector<gpt_vocab::id> embd;
|
210
211
|
std::vector<int32_t> last_n_tokens(model->hparams.n_ctx, 0);
|
212
|
+
gpt_vocab::id token_eos = vocab->token_to_id["</s>"];
|
211
213
|
|
212
214
|
while (n_sampled < n_predict) {
|
213
215
|
if (embd.size() > 0) {
|
214
|
-
if (!gpt_neox_eval(*model, n_threads, n_past, embd, logits, mem_per_token)) {
|
216
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
215
217
|
rb_raise(rb_eRuntimeError, "failed to predict.");
|
216
218
|
return Qnil;
|
217
219
|
}
|
@@ -240,13 +242,73 @@ static VALUE gpt_neox_client_completions(int argc, VALUE* argv, VALUE self) {
|
|
240
242
|
}
|
241
243
|
|
242
244
|
for (auto id : embd) completions += vocab->id_to_token[id];
|
243
|
-
if (embd.back() ==
|
245
|
+
if (!embd.empty() && embd.back() == token_eos) break;
|
244
246
|
}
|
245
247
|
|
246
248
|
RB_GC_GUARD(prompt_);
|
247
249
|
return rb_utf8_str_new_cstr(completions.c_str());
|
248
250
|
}
|
249
251
|
|
252
|
+
static VALUE gpt_neox_client_embeddings(int argc, VALUE* argv, VALUE self) {
|
253
|
+
VALUE prompt_ = Qnil;
|
254
|
+
VALUE kw_args = Qnil;
|
255
|
+
rb_scan_args(argc, argv, "1:", &prompt_, &kw_args);
|
256
|
+
|
257
|
+
ID kw_table[2] = { rb_intern("n_batch"), rb_intern("normalize") };
|
258
|
+
VALUE kw_values[2] = { Qundef, Qundef };
|
259
|
+
rb_get_kwargs(kw_args, kw_table, 0, 2, kw_values);
|
260
|
+
|
261
|
+
if (kw_values[0] != Qundef && !RB_INTEGER_TYPE_P(kw_values[0])) {
|
262
|
+
rb_raise(rb_eArgError, "n_batch must be an integer");
|
263
|
+
return Qnil;
|
264
|
+
}
|
265
|
+
|
266
|
+
std::string prompt(StringValueCStr(prompt_));
|
267
|
+
const int n_batch = kw_values[0] != Qundef ? NUM2INT(kw_values[0]) : 8;
|
268
|
+
const bool normalize = kw_values[1] != Qundef ? RTEST(kw_values[1]) : false;
|
269
|
+
|
270
|
+
gpt_neox_model* model = RbGPTNeoXModel::get_gpt_neox_model(rb_iv_get(self, "@model"));
|
271
|
+
gpt_vocab* vocab = RbGPTVocab::get_gpt_vocab(rb_iv_get(self, "@vocab"));
|
272
|
+
const int n_threads = NUM2INT(rb_iv_get(self, "@n_threads"));
|
273
|
+
|
274
|
+
std::vector<gpt_vocab::id> embd_inp = gpt_tokenize(*vocab, prompt);
|
275
|
+
|
276
|
+
if (embd_inp.size() > model->hparams.n_ctx) {
|
277
|
+
rb_raise(rb_eArgError, "prompt is too long");
|
278
|
+
return Qnil;
|
279
|
+
}
|
280
|
+
|
281
|
+
std::vector<float> embedding;
|
282
|
+
std::vector<float> logits;
|
283
|
+
size_t mem_per_token = 0;
|
284
|
+
gpt_neox_eval(*model, n_threads, 0, { 0, 1, 2, 3 }, embedding, logits, mem_per_token);
|
285
|
+
|
286
|
+
int n_past = 0;
|
287
|
+
std::vector<gpt_vocab::id> embd;
|
288
|
+
while (!embd_inp.empty()) {
|
289
|
+
const int n_tokens = std::min(n_batch, static_cast<int>(embd_inp.size()));
|
290
|
+
embd.insert(embd.end(), embd_inp.begin(), embd_inp.begin() + n_tokens);
|
291
|
+
if (!gpt_neox_eval(*model, n_threads, n_past, embd, embedding, logits, mem_per_token)) {
|
292
|
+
rb_raise(rb_eRuntimeError, "failed to predict.");
|
293
|
+
return Qnil;
|
294
|
+
}
|
295
|
+
n_past += n_tokens;
|
296
|
+
embd.clear();
|
297
|
+
embd_inp.erase(embd_inp.begin(), embd_inp.begin() + n_tokens);
|
298
|
+
}
|
299
|
+
|
300
|
+
if (normalize) {
|
301
|
+
const float norm = std::sqrt(std::inner_product(embedding.begin(), embedding.end(), embedding.begin(), 0.0f));
|
302
|
+
for (auto& v : embedding) v /= norm;
|
303
|
+
}
|
304
|
+
|
305
|
+
VALUE res = rb_ary_new2(embedding.size());
|
306
|
+
for (size_t i = 0; i < embedding.size(); i++) rb_ary_store(res, i, DBL2NUM(embedding[i]));
|
307
|
+
|
308
|
+
RB_GC_GUARD(prompt_);
|
309
|
+
return res;
|
310
|
+
}
|
311
|
+
|
250
312
|
extern "C" void Init_gpt_neox_client(void) {
|
251
313
|
/**
|
252
314
|
* Document-class: GPTNeoXClient
|
@@ -289,6 +351,22 @@ extern "C" void Init_gpt_neox_client(void) {
|
|
289
351
|
* @return [String]
|
290
352
|
*/
|
291
353
|
rb_define_method(rb_cGPTNeoXClient, "completions", RUBY_METHOD_FUNC(gpt_neox_client_completions), -1);
|
354
|
+
/**
|
355
|
+
* Generates embeddings.
|
356
|
+
*
|
357
|
+
* @example
|
358
|
+
* require "gpt_neox_client"
|
359
|
+
*
|
360
|
+
* client = GPTNeoXClient.new("gpt-neox-f16.bin")
|
361
|
+
* client.embeddings("Hello, my name is")
|
362
|
+
*
|
363
|
+
* @overload embeddings(text, n_batch: 8, normalize: false)
|
364
|
+
* @param [String] text The text.
|
365
|
+
* @param [Integer] n_batch The number of tokens to evalauate at once.
|
366
|
+
* @param [Boolean] normalize The flag to normalize the embeddings.
|
367
|
+
* @return [Array<Float>]
|
368
|
+
*/
|
369
|
+
rb_define_method(rb_cGPTNeoXClient, "embeddings", RUBY_METHOD_FUNC(gpt_neox_client_embeddings), -1);
|
292
370
|
/**
|
293
371
|
* Returns the path to the model.
|
294
372
|
* @return [String]
|
@@ -433,6 +433,7 @@ bool gpt_neox_eval(
|
|
433
433
|
const int n_threads,
|
434
434
|
const int n_past,
|
435
435
|
const std::vector<gpt_vocab::id> & embd_inp,
|
436
|
+
std::vector<float> & embd_d,
|
436
437
|
std::vector<float> & embd_w,
|
437
438
|
size_t & mem_per_token) {
|
438
439
|
const int N = embd_inp.size();
|
@@ -657,6 +658,10 @@ bool gpt_neox_eval(
|
|
657
658
|
//embd_w.resize(n_vocab*N);
|
658
659
|
//memcpy(embd_w.data(), ggml_get_data(inpL), sizeof(float)*n_vocab*N);
|
659
660
|
|
661
|
+
embd_d.resize(n_embd);
|
662
|
+
struct ggml_tensor* embeddings = gf.nodes[gf.n_nodes - 2];
|
663
|
+
memcpy(embd_d.data(), (float*)ggml_get_data(embeddings) + (n_embd * (N - 1)), sizeof(float)*n_embd);
|
664
|
+
|
660
665
|
// return result for just the last token
|
661
666
|
embd_w.resize(n_vocab);
|
662
667
|
memcpy(embd_w.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gpt_neox_client
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: gpt_neox_client is a simple client for GPT-NeoX.
|
14
14
|
email:
|