youtokentome 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/youtokentome/ext.cpp +16 -17
- data/ext/youtokentome/extconf.rb +1 -1
- data/lib/youtokentome/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dc70dec1b03c48e827b1904a17bcb7edfaf7d97b9d21a83789ca8e40e75cbede
|
|
4
|
+
data.tar.gz: 00c7f498eda14f3ac274c9c6207a697ecfe54e88adc834ebe9c256318ae42c00
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f604358334bf73b7ce0b7a8fd3cc13988d5af01a7c04ba7a96ddd22d97eb5b17c79f8060969e9ef3dd53a7bd59b249f338488ef297559d5d2e3045541615fb4b
|
|
7
|
+
data.tar.gz: 208dedfe2bc7ff170fc6a8abf730dd19123e890e795ac62b4b0645716799392baf99e8fc0e50cf225f44770a7f41a8d73f87a0da151fe265345ac6764d92fd87
|
data/CHANGELOG.md
CHANGED
data/ext/youtokentome/ext.cpp
CHANGED
|
@@ -6,12 +6,7 @@
|
|
|
6
6
|
#include <rice/rice.hpp>
|
|
7
7
|
#include <rice/stl.hpp>
|
|
8
8
|
|
|
9
|
-
using Rice::define_class_under;
|
|
10
|
-
using Rice::define_module;
|
|
11
|
-
using Rice::define_module_under;
|
|
12
9
|
using Rice::Array;
|
|
13
|
-
using Rice::Module;
|
|
14
|
-
using Rice::Object;
|
|
15
10
|
|
|
16
11
|
void check_status(vkcom::Status& status) {
|
|
17
12
|
if (!status.ok()) {
|
|
@@ -41,10 +36,11 @@ namespace Rice::detail
|
|
|
41
36
|
public:
|
|
42
37
|
std::vector<int> convert(VALUE x)
|
|
43
38
|
{
|
|
44
|
-
|
|
39
|
+
auto a = Array(x);
|
|
45
40
|
std::vector<int> ret;
|
|
46
|
-
|
|
47
|
-
|
|
41
|
+
ret.reserve(a.size());
|
|
42
|
+
for (const auto& v : a) {
|
|
43
|
+
ret.push_back(From_Ruby<int>().convert(v.value()));
|
|
48
44
|
}
|
|
49
45
|
return ret;
|
|
50
46
|
}
|
|
@@ -56,22 +52,25 @@ namespace Rice::detail
|
|
|
56
52
|
public:
|
|
57
53
|
std::vector<std::string> convert(VALUE x)
|
|
58
54
|
{
|
|
59
|
-
|
|
55
|
+
auto a = Array(x);
|
|
60
56
|
std::vector<std::string> ret;
|
|
61
|
-
|
|
62
|
-
|
|
57
|
+
ret.reserve(a.size());
|
|
58
|
+
for (const auto& v : a) {
|
|
59
|
+
ret.push_back(From_Ruby<std::string>().convert(v.value()));
|
|
63
60
|
}
|
|
64
61
|
return ret;
|
|
65
62
|
}
|
|
66
63
|
};
|
|
67
64
|
}
|
|
68
65
|
|
|
69
|
-
extern "C"
|
|
70
|
-
|
|
71
|
-
|
|
66
|
+
extern "C"
|
|
67
|
+
void Init_ext()
|
|
68
|
+
{
|
|
69
|
+
auto rb_mYouTokenToMe = Rice::define_module("YouTokenToMe");
|
|
70
|
+
auto rb_mExt = Rice::define_module_under(rb_mYouTokenToMe, "Ext")
|
|
72
71
|
.define_singleton_function(
|
|
73
72
|
"train_bpe",
|
|
74
|
-
[](const std::string
|
|
73
|
+
[](const std::string& input_path, const std::string& model_path, int vocab_size, double coverage,
|
|
75
74
|
int n_threads, int pad_id, int unk_id, int bos_id, int eos_id) {
|
|
76
75
|
|
|
77
76
|
vkcom::SpecialTokens special_tokens(pad_id, unk_id, bos_id, eos_id);
|
|
@@ -80,7 +79,7 @@ extern "C" void Init_ext() {
|
|
|
80
79
|
check_status(status);
|
|
81
80
|
});
|
|
82
81
|
|
|
83
|
-
define_class_under<vkcom::BaseEncoder>(rb_mExt, "BaseEncoder")
|
|
82
|
+
Rice::define_class_under<vkcom::BaseEncoder>(rb_mExt, "BaseEncoder")
|
|
84
83
|
.define_method("vocab_size", &vkcom::BaseEncoder::vocab_size)
|
|
85
84
|
.define_method("subword_to_id", &vkcom::BaseEncoder::subword_to_id)
|
|
86
85
|
.define_method(
|
|
@@ -140,7 +139,7 @@ extern "C" void Init_ext() {
|
|
|
140
139
|
.define_method("vocab", &vkcom::BaseEncoder::vocabulary)
|
|
141
140
|
.define_singleton_function(
|
|
142
141
|
"new",
|
|
143
|
-
[](std::string
|
|
142
|
+
[](const std::string& model_path, int n_threads) {
|
|
144
143
|
auto status = vkcom::Status();
|
|
145
144
|
vkcom::BaseEncoder encoder(model_path, n_threads, &status);
|
|
146
145
|
check_status(status);
|
data/ext/youtokentome/extconf.rb
CHANGED
data/lib/youtokentome/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: youtokentome
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-05-
|
|
11
|
+
date: 2021-05-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rice
|