fasttext 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +3 -3
- data/ext/fasttext/ext.cpp +36 -37
- data/lib/fasttext/version.rb +1 -1
- metadata +5 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8e2e013c33aa194ca0b996c1dfab98452d15f6ce909e14d69b6a60c3911ab077
|
|
4
|
+
data.tar.gz: ff859b72fa2150f3158bf1317780ffaeb3e7dbbe3da55313b634ae3d4a5dbd5d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b1eb41b2bad524ca07e564c62c9d104ffec5730cd7b19a200df793ec3a359538cf2f83eefecfec89b019c0101cc48e4dbc50ea4fc9921af1841e8c4fd2b0cec5
|
|
7
|
+
data.tar.gz: e45d97a8d015eca12e23c83fcd1aa5890c55ea09b6f741285a15bb54e0053a6d2f1aa28d859e977ea81ab081b1918885e310370a227d519338225dd657897674
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -167,8 +167,8 @@ FastText::Classifier.new(
|
|
|
167
167
|
dim: 100, # size of word vectors
|
|
168
168
|
ws: 5, # size of the context window
|
|
169
169
|
epoch: 5, # number of epochs
|
|
170
|
-
min_count: 1, # minimal number of word
|
|
171
|
-
min_count_label: 1, # minimal number of label
|
|
170
|
+
min_count: 1, # minimal number of word occurrences
|
|
171
|
+
min_count_label: 1, # minimal number of label occurrences
|
|
172
172
|
minn: 0, # min length of char ngram
|
|
173
173
|
maxn: 0, # max length of char ngram
|
|
174
174
|
neg: 5, # number of negatives sampled
|
|
@@ -197,7 +197,7 @@ FastText::Vectorizer.new(
|
|
|
197
197
|
dim: 100, # size of word vectors
|
|
198
198
|
ws: 5, # size of the context window
|
|
199
199
|
epoch: 5, # number of epochs
|
|
200
|
-
min_count: 5, # minimal number of word
|
|
200
|
+
min_count: 5, # minimal number of word occurrences
|
|
201
201
|
minn: 3, # min length of char ngram
|
|
202
202
|
maxn: 6, # max length of char ngram
|
|
203
203
|
neg: 5, # number of negatives sampled
|
data/ext/fasttext/ext.cpp
CHANGED
|
@@ -2,8 +2,11 @@
|
|
|
2
2
|
#include <cmath>
|
|
3
3
|
#include <cstdint>
|
|
4
4
|
#include <iterator>
|
|
5
|
+
#include <memory>
|
|
5
6
|
#include <sstream>
|
|
6
7
|
#include <stdexcept>
|
|
8
|
+
#include <string>
|
|
9
|
+
#include <vector>
|
|
7
10
|
|
|
8
11
|
// fasttext
|
|
9
12
|
#include <args.h>
|
|
@@ -21,40 +24,36 @@ using fasttext::Args;
|
|
|
21
24
|
using fasttext::FastText;
|
|
22
25
|
|
|
23
26
|
using Rice::Array;
|
|
24
|
-
using Rice::Constructor;
|
|
25
|
-
using Rice::Module;
|
|
26
|
-
using Rice::define_class_under;
|
|
27
|
-
using Rice::define_module;
|
|
28
|
-
using Rice::define_module_under;
|
|
29
27
|
|
|
30
|
-
namespace Rice::detail
|
|
31
|
-
{
|
|
28
|
+
namespace Rice::detail {
|
|
32
29
|
template<>
|
|
33
|
-
class To_Ruby<std::vector<std::pair<fasttext::real, std::string>>>
|
|
34
|
-
{
|
|
30
|
+
class To_Ruby<std::vector<std::pair<fasttext::real, std::string>>> {
|
|
35
31
|
public:
|
|
36
|
-
|
|
37
|
-
|
|
32
|
+
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
33
|
+
|
|
34
|
+
VALUE convert(std::vector<std::pair<fasttext::real, std::string>> const & x) {
|
|
38
35
|
Array ret;
|
|
39
36
|
for (const auto& v : x) {
|
|
40
37
|
Array a;
|
|
41
|
-
a.push(v.first);
|
|
42
|
-
a.push(v.second);
|
|
43
|
-
ret.push(a);
|
|
38
|
+
a.push(v.first, false);
|
|
39
|
+
a.push(v.second, false);
|
|
40
|
+
ret.push(a, false);
|
|
44
41
|
}
|
|
45
42
|
return ret;
|
|
46
43
|
}
|
|
44
|
+
|
|
45
|
+
private:
|
|
46
|
+
Arg* arg_ = nullptr;
|
|
47
47
|
};
|
|
48
|
-
}
|
|
48
|
+
} // namespace Rice::detail
|
|
49
49
|
|
|
50
50
|
extern "C"
|
|
51
|
-
void Init_ext()
|
|
52
|
-
|
|
53
|
-
Module
|
|
54
|
-
Module rb_mExt = define_module_under(rb_mFastText, "Ext");
|
|
51
|
+
void Init_ext() {
|
|
52
|
+
Rice::Module rb_mFastText = Rice::define_module("FastText");
|
|
53
|
+
Rice::Module rb_mExt = Rice::define_module_under(rb_mFastText, "Ext");
|
|
55
54
|
|
|
56
|
-
define_class_under<Args>(rb_mExt, "Args")
|
|
57
|
-
.define_constructor(Constructor<Args>())
|
|
55
|
+
Rice::define_class_under<Args>(rb_mExt, "Args")
|
|
56
|
+
.define_constructor(Rice::Constructor<Args>())
|
|
58
57
|
.define_attr("input", &Args::input)
|
|
59
58
|
.define_attr("output", &Args::output)
|
|
60
59
|
.define_attr("lr", &Args::lr)
|
|
@@ -110,8 +109,8 @@ void Init_ext()
|
|
|
110
109
|
.define_attr("autotune_duration", &Args::autotuneDuration)
|
|
111
110
|
.define_attr("autotune_model_size", &Args::autotuneModelSize);
|
|
112
111
|
|
|
113
|
-
define_class_under<FastText>(rb_mExt, "Model")
|
|
114
|
-
.define_constructor(Constructor<FastText>())
|
|
112
|
+
Rice::define_class_under<FastText>(rb_mExt, "Model")
|
|
113
|
+
.define_constructor(Rice::Constructor<FastText>())
|
|
115
114
|
.define_method(
|
|
116
115
|
"words",
|
|
117
116
|
[](FastText& m) {
|
|
@@ -121,13 +120,13 @@ void Init_ext()
|
|
|
121
120
|
Array vocab_list;
|
|
122
121
|
Array vocab_freq;
|
|
123
122
|
for (int32_t i = 0; i < d->nwords(); i++) {
|
|
124
|
-
vocab_list.push(d->getWord(i));
|
|
125
|
-
vocab_freq.push(freq[i]);
|
|
123
|
+
vocab_list.push(d->getWord(i), false);
|
|
124
|
+
vocab_freq.push(freq[i], false);
|
|
126
125
|
}
|
|
127
126
|
|
|
128
127
|
Array ret;
|
|
129
|
-
ret.push(vocab_list);
|
|
130
|
-
ret.push(vocab_freq);
|
|
128
|
+
ret.push(vocab_list, false);
|
|
129
|
+
ret.push(vocab_freq, false);
|
|
131
130
|
return ret;
|
|
132
131
|
})
|
|
133
132
|
.define_method(
|
|
@@ -139,13 +138,13 @@ void Init_ext()
|
|
|
139
138
|
Array vocab_list;
|
|
140
139
|
Array vocab_freq;
|
|
141
140
|
for (int32_t i = 0; i < d->nlabels(); i++) {
|
|
142
|
-
vocab_list.push(d->getLabel(i));
|
|
143
|
-
vocab_freq.push(freq[i]);
|
|
141
|
+
vocab_list.push(d->getLabel(i), false);
|
|
142
|
+
vocab_freq.push(freq[i], false);
|
|
144
143
|
}
|
|
145
144
|
|
|
146
145
|
Array ret;
|
|
147
|
-
ret.push(vocab_list);
|
|
148
|
-
ret.push(vocab_freq);
|
|
146
|
+
ret.push(vocab_list, false);
|
|
147
|
+
ret.push(vocab_freq, false);
|
|
149
148
|
return ret;
|
|
150
149
|
})
|
|
151
150
|
.define_method(
|
|
@@ -160,9 +159,9 @@ void Init_ext()
|
|
|
160
159
|
ifs.close();
|
|
161
160
|
|
|
162
161
|
Array ret;
|
|
163
|
-
ret.push(meter.nexamples());
|
|
164
|
-
ret.push(meter.precision());
|
|
165
|
-
ret.push(meter.recall());
|
|
162
|
+
ret.push(meter.nexamples(), false);
|
|
163
|
+
ret.push(meter.precision(), false);
|
|
164
|
+
ret.push(meter.recall(), false);
|
|
166
165
|
return ret;
|
|
167
166
|
})
|
|
168
167
|
.define_method(
|
|
@@ -202,7 +201,7 @@ void Init_ext()
|
|
|
202
201
|
m.getWordVector(vec, word);
|
|
203
202
|
Array ret;
|
|
204
203
|
for (size_t i = 0; i < vec.size(); i++) {
|
|
205
|
-
ret.push(vec[i]);
|
|
204
|
+
ret.push(vec[i], false);
|
|
206
205
|
}
|
|
207
206
|
return ret;
|
|
208
207
|
})
|
|
@@ -216,7 +215,7 @@ void Init_ext()
|
|
|
216
215
|
|
|
217
216
|
Array ret;
|
|
218
217
|
for (const auto& subword : subwords) {
|
|
219
|
-
ret.push(subword);
|
|
218
|
+
ret.push(subword, false);
|
|
220
219
|
}
|
|
221
220
|
return ret;
|
|
222
221
|
})
|
|
@@ -229,7 +228,7 @@ void Init_ext()
|
|
|
229
228
|
m.getSentenceVector(in, vec);
|
|
230
229
|
Array ret;
|
|
231
230
|
for (size_t i = 0; i < vec.size(); i++) {
|
|
232
|
-
ret.push(vec[i]);
|
|
231
|
+
ret.push(vec[i], false);
|
|
233
232
|
}
|
|
234
233
|
return ret;
|
|
235
234
|
})
|
data/lib/fasttext/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fasttext
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rice
|
|
@@ -16,15 +15,14 @@ dependencies:
|
|
|
16
15
|
requirements:
|
|
17
16
|
- - ">="
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: 4.
|
|
18
|
+
version: '4.7'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - ">="
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: 4.
|
|
27
|
-
description:
|
|
25
|
+
version: '4.7'
|
|
28
26
|
email: andrew@ankane.org
|
|
29
27
|
executables: []
|
|
30
28
|
extensions:
|
|
@@ -75,7 +73,6 @@ homepage: https://github.com/ankane/fastText-ruby
|
|
|
75
73
|
licenses:
|
|
76
74
|
- MIT
|
|
77
75
|
metadata: {}
|
|
78
|
-
post_install_message:
|
|
79
76
|
rdoc_options: []
|
|
80
77
|
require_paths:
|
|
81
78
|
- lib
|
|
@@ -90,8 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
90
87
|
- !ruby/object:Gem::Version
|
|
91
88
|
version: '0'
|
|
92
89
|
requirements: []
|
|
93
|
-
rubygems_version: 3.
|
|
94
|
-
signing_key:
|
|
90
|
+
rubygems_version: 3.6.9
|
|
95
91
|
specification_version: 4
|
|
96
92
|
summary: Efficient text classification and representation learning for Ruby
|
|
97
93
|
test_files: []
|