fasttext 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/ext/fasttext/ext.cpp +38 -35
- data/lib/fasttext/version.rb +1 -1
- data/lib/fasttext.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9978bfe50053f76326bfd7f97b7acb6ff7af67ca4acba77f21de5000175428b1
|
|
4
|
+
data.tar.gz: 7488a7dc79f8d2e62636468f18e916b8bfbccec59897909a1f666a3ff02a1d86
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ffd2b8f15be1fdbbd0a0b10dfe94c482645c2961ca119af160c8a9952bb95380c5512fb2de85b9885c773f3393fbcc776efa2dcaf94dd390c459470974a07ef7
|
|
7
|
+
data.tar.gz: 831fa02b20d7b594dc3ceefe9b33499df37d75436ea724369b3dc976d950b72c533ae400a1d4c26838381746c94c227bccb50f9ca341d5575e87dd88e2908e41
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
3
|
Copyright (c) 2016-present, Facebook, Inc.
|
|
4
|
-
Copyright (c) 2019-
|
|
4
|
+
Copyright (c) 2019-2025 Andrew Kane
|
|
5
5
|
|
|
6
6
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
7
|
of this software and associated documentation files (the "Software"), to deal
|
data/ext/fasttext/ext.cpp
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
// stdlib
|
|
2
2
|
#include <cmath>
|
|
3
3
|
#include <cstdint>
|
|
4
|
-
#include <
|
|
4
|
+
#include <fstream>
|
|
5
5
|
#include <memory>
|
|
6
6
|
#include <sstream>
|
|
7
7
|
#include <stdexcept>
|
|
8
8
|
#include <string>
|
|
9
|
+
#include <utility>
|
|
9
10
|
#include <vector>
|
|
10
11
|
|
|
11
12
|
// fasttext
|
|
12
13
|
#include <args.h>
|
|
13
14
|
#include <autotune.h>
|
|
14
15
|
#include <densematrix.h>
|
|
16
|
+
#include <dictionary.h>
|
|
15
17
|
#include <fasttext.h>
|
|
18
|
+
#include <meter.h>
|
|
16
19
|
#include <real.h>
|
|
17
20
|
#include <vector.h>
|
|
18
21
|
|
|
@@ -23,21 +26,19 @@
|
|
|
23
26
|
using fasttext::Args;
|
|
24
27
|
using fasttext::FastText;
|
|
25
28
|
|
|
26
|
-
using Rice::Array;
|
|
27
|
-
|
|
28
29
|
namespace Rice::detail {
|
|
29
30
|
template<>
|
|
30
31
|
class To_Ruby<std::vector<std::pair<fasttext::real, std::string>>> {
|
|
31
32
|
public:
|
|
32
33
|
explicit To_Ruby(Arg* arg) : arg_(arg) { }
|
|
33
34
|
|
|
34
|
-
VALUE convert(std::vector<std::pair<fasttext::real, std::string
|
|
35
|
-
|
|
35
|
+
VALUE convert(const std::vector<std::pair<fasttext::real, std::string>>& x) {
|
|
36
|
+
VALUE ret = detail::protect(rb_ary_new2, x.size());
|
|
36
37
|
for (const auto& v : x) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
a
|
|
40
|
-
|
|
38
|
+
VALUE p1 = To_Ruby<fasttext::real>().convert(v.first);
|
|
39
|
+
VALUE p2 = To_Ruby<std::string>().convert(v.second);
|
|
40
|
+
VALUE a = detail::protect(rb_ary_new3, 2, p1, p2);
|
|
41
|
+
detail::protect(rb_ary_push, ret, a);
|
|
41
42
|
}
|
|
42
43
|
return ret;
|
|
43
44
|
}
|
|
@@ -117,14 +118,14 @@ void Init_ext() {
|
|
|
117
118
|
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
|
118
119
|
std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::word);
|
|
119
120
|
|
|
120
|
-
Array vocab_list;
|
|
121
|
-
Array vocab_freq;
|
|
121
|
+
Rice::Array vocab_list;
|
|
122
|
+
Rice::Array vocab_freq;
|
|
122
123
|
for (int32_t i = 0; i < d->nwords(); i++) {
|
|
123
124
|
vocab_list.push(d->getWord(i), false);
|
|
124
|
-
vocab_freq.push(freq
|
|
125
|
+
vocab_freq.push(freq.at(i), false);
|
|
125
126
|
}
|
|
126
127
|
|
|
127
|
-
Array ret;
|
|
128
|
+
Rice::Array ret;
|
|
128
129
|
ret.push(vocab_list, false);
|
|
129
130
|
ret.push(vocab_freq, false);
|
|
130
131
|
return ret;
|
|
@@ -135,14 +136,14 @@ void Init_ext() {
|
|
|
135
136
|
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
|
136
137
|
std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::label);
|
|
137
138
|
|
|
138
|
-
Array vocab_list;
|
|
139
|
-
Array vocab_freq;
|
|
139
|
+
Rice::Array vocab_list;
|
|
140
|
+
Rice::Array vocab_freq;
|
|
140
141
|
for (int32_t i = 0; i < d->nlabels(); i++) {
|
|
141
142
|
vocab_list.push(d->getLabel(i), false);
|
|
142
|
-
vocab_freq.push(freq
|
|
143
|
+
vocab_freq.push(freq.at(i), false);
|
|
143
144
|
}
|
|
144
145
|
|
|
145
|
-
Array ret;
|
|
146
|
+
Rice::Array ret;
|
|
146
147
|
ret.push(vocab_list, false);
|
|
147
148
|
ret.push(vocab_freq, false);
|
|
148
149
|
return ret;
|
|
@@ -150,7 +151,7 @@ void Init_ext() {
|
|
|
150
151
|
.define_method(
|
|
151
152
|
"test",
|
|
152
153
|
[](FastText& m, const std::string& filename, int32_t k) {
|
|
153
|
-
std::ifstream ifs
|
|
154
|
+
std::ifstream ifs{filename};
|
|
154
155
|
if (!ifs.is_open()) {
|
|
155
156
|
throw std::invalid_argument("Test file cannot be opened!");
|
|
156
157
|
}
|
|
@@ -158,7 +159,7 @@ void Init_ext() {
|
|
|
158
159
|
m.test(ifs, k, 0.0, meter);
|
|
159
160
|
ifs.close();
|
|
160
161
|
|
|
161
|
-
Array ret;
|
|
162
|
+
Rice::Array ret;
|
|
162
163
|
ret.push(meter.nexamples(), false);
|
|
163
164
|
ret.push(meter.precision(), false);
|
|
164
165
|
ret.push(meter.recall(), false);
|
|
@@ -166,13 +167,13 @@ void Init_ext() {
|
|
|
166
167
|
})
|
|
167
168
|
.define_method(
|
|
168
169
|
"load_model",
|
|
169
|
-
[](FastText& m, const std::string&
|
|
170
|
-
m.loadModel(
|
|
170
|
+
[](FastText& m, const std::string& filename) {
|
|
171
|
+
m.loadModel(filename);
|
|
171
172
|
})
|
|
172
173
|
.define_method(
|
|
173
174
|
"save_model",
|
|
174
|
-
[](FastText& m, const std::string&
|
|
175
|
-
m.saveModel(
|
|
175
|
+
[](FastText& m, const std::string& filename) {
|
|
176
|
+
m.saveModel(filename);
|
|
176
177
|
})
|
|
177
178
|
.define_method("dimension", &FastText::getDimension)
|
|
178
179
|
.define_method("quantized?", &FastText::isQuant)
|
|
@@ -181,7 +182,7 @@ void Init_ext() {
|
|
|
181
182
|
.define_method(
|
|
182
183
|
"predict",
|
|
183
184
|
[](FastText& m, const std::string& text, int32_t k, float threshold) {
|
|
184
|
-
std::stringstream ioss
|
|
185
|
+
std::stringstream ioss{text};
|
|
185
186
|
std::vector<std::pair<fasttext::real, std::string>> predictions;
|
|
186
187
|
m.predictLine(ioss, predictions, k, threshold);
|
|
187
188
|
return predictions;
|
|
@@ -196,11 +197,12 @@ void Init_ext() {
|
|
|
196
197
|
.define_method(
|
|
197
198
|
"word_vector",
|
|
198
199
|
[](FastText& m, const std::string& word) {
|
|
199
|
-
|
|
200
|
-
fasttext::Vector vec
|
|
200
|
+
int dimension = m.getDimension();
|
|
201
|
+
fasttext::Vector vec{dimension};
|
|
201
202
|
m.getWordVector(vec, word);
|
|
202
|
-
Array ret;
|
|
203
|
-
|
|
203
|
+
Rice::Array ret;
|
|
204
|
+
// fasttext::Vector uses int64_t for size and indexing
|
|
205
|
+
for (int64_t i = 0; i < vec.size(); i++) {
|
|
204
206
|
ret.push(vec[i], false);
|
|
205
207
|
}
|
|
206
208
|
return ret;
|
|
@@ -213,7 +215,7 @@ void Init_ext() {
|
|
|
213
215
|
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
|
214
216
|
d->getSubwords(word, ngrams, subwords);
|
|
215
217
|
|
|
216
|
-
Array ret;
|
|
218
|
+
Rice::Array ret;
|
|
217
219
|
for (const auto& subword : subwords) {
|
|
218
220
|
ret.push(subword, false);
|
|
219
221
|
}
|
|
@@ -222,12 +224,13 @@ void Init_ext() {
|
|
|
222
224
|
.define_method(
|
|
223
225
|
"sentence_vector",
|
|
224
226
|
[](FastText& m, const std::string& text) {
|
|
225
|
-
std::istringstream in
|
|
226
|
-
|
|
227
|
-
fasttext::Vector vec
|
|
227
|
+
std::istringstream in{text};
|
|
228
|
+
int dimension = m.getDimension();
|
|
229
|
+
fasttext::Vector vec{dimension};
|
|
228
230
|
m.getSentenceVector(in, vec);
|
|
229
|
-
Array ret;
|
|
230
|
-
|
|
231
|
+
Rice::Array ret;
|
|
232
|
+
// fasttext::Vector uses int64_t for size and indexing
|
|
233
|
+
for (int64_t i = 0; i < vec.size(); i++) {
|
|
231
234
|
ret.push(vec[i], false);
|
|
232
235
|
}
|
|
233
236
|
return ret;
|
|
@@ -236,7 +239,7 @@ void Init_ext() {
|
|
|
236
239
|
"train",
|
|
237
240
|
[](FastText& m, Args& a) {
|
|
238
241
|
if (a.hasAutotune()) {
|
|
239
|
-
fasttext::Autotune autotune
|
|
242
|
+
fasttext::Autotune autotune{std::shared_ptr<fasttext::FastText>(&m, [](fasttext::FastText*) {})};
|
|
240
243
|
autotune.train(a);
|
|
241
244
|
} else {
|
|
242
245
|
m.train(a);
|
data/lib/fasttext/version.rb
CHANGED
data/lib/fasttext.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fasttext
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
@@ -80,14 +80,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
80
80
|
requirements:
|
|
81
81
|
- - ">="
|
|
82
82
|
- !ruby/object:Gem::Version
|
|
83
|
-
version: '3.
|
|
83
|
+
version: '3.3'
|
|
84
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
85
|
requirements:
|
|
86
86
|
- - ">="
|
|
87
87
|
- !ruby/object:Gem::Version
|
|
88
88
|
version: '0'
|
|
89
89
|
requirements: []
|
|
90
|
-
rubygems_version:
|
|
90
|
+
rubygems_version: 4.0.6
|
|
91
91
|
specification_version: 4
|
|
92
92
|
summary: Efficient text classification and representation learning for Ruby
|
|
93
93
|
test_files: []
|