fasttext 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 374cacde6bb0c017d6d787dd41180d5de774929a34cad2e356546e02c89345b6
4
- data.tar.gz: b4c767a2d263053148a2f02537fd2eff0883840cf8da5e4c5f437c5fcaca55da
3
+ metadata.gz: 9978bfe50053f76326bfd7f97b7acb6ff7af67ca4acba77f21de5000175428b1
4
+ data.tar.gz: 7488a7dc79f8d2e62636468f18e916b8bfbccec59897909a1f666a3ff02a1d86
5
5
  SHA512:
6
- metadata.gz: 65dceed43c0cb7f84779b85b8387bdd67ab43b779fff5577e4bb13202ddee6a0d27fbdcab5f82af40fc070925c6e30ce73090c7db7d21da48a79810011235fd5
7
- data.tar.gz: c565ef3d2c8132e84ec84a2d33b04caf6c8dead8928287966db1c34814673df3ca5a3d8372e3994478fd364f95c806a2515fa7f2106bef30deaf9e89f19a938a
6
+ metadata.gz: ffd2b8f15be1fdbbd0a0b10dfe94c482645c2961ca119af160c8a9952bb95380c5512fb2de85b9885c773f3393fbcc776efa2dcaf94dd390c459470974a07ef7
7
+ data.tar.gz: 831fa02b20d7b594dc3ceefe9b33499df37d75436ea724369b3dc976d950b72c533ae400a1d4c26838381746c94c227bccb50f9ca341d5575e87dd88e2908e41
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.5.0 (2026-04-09)
2
+
3
+ - Dropped support for Ruby < 3.3
4
+
5
+ ## 0.4.1 (2025-10-26)
6
+
7
+ - Fixed error with Rice 4.7
8
+
1
9
  ## 0.4.0 (2024-10-22)
2
10
 
3
11
  - Dropped support for Ruby < 3.1
data/LICENSE.txt CHANGED
@@ -1,7 +1,7 @@
1
1
  MIT License
2
2
 
3
3
  Copyright (c) 2016-present, Facebook, Inc.
4
- Copyright (c) 2019-2024 Andrew Kane
4
+ Copyright (c) 2019-2025 Andrew Kane
5
5
 
6
6
  Permission is hereby granted, free of charge, to any person obtaining a copy
7
7
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -167,8 +167,8 @@ FastText::Classifier.new(
167
167
  dim: 100, # size of word vectors
168
168
  ws: 5, # size of the context window
169
169
  epoch: 5, # number of epochs
170
- min_count: 1, # minimal number of word occurences
171
- min_count_label: 1, # minimal number of label occurences
170
+ min_count: 1, # minimal number of word occurrences
171
+ min_count_label: 1, # minimal number of label occurrences
172
172
  minn: 0, # min length of char ngram
173
173
  maxn: 0, # max length of char ngram
174
174
  neg: 5, # number of negatives sampled
@@ -197,7 +197,7 @@ FastText::Vectorizer.new(
197
197
  dim: 100, # size of word vectors
198
198
  ws: 5, # size of the context window
199
199
  epoch: 5, # number of epochs
200
- min_count: 5, # minimal number of word occurences
200
+ min_count: 5, # minimal number of word occurrences
201
201
  minn: 3, # min length of char ngram
202
202
  maxn: 6, # max length of char ngram
203
203
  neg: 5, # number of negatives sampled
data/ext/fasttext/ext.cpp CHANGED
@@ -1,15 +1,21 @@
1
1
  // stdlib
2
2
  #include <cmath>
3
3
  #include <cstdint>
4
- #include <iterator>
4
+ #include <fstream>
5
+ #include <memory>
5
6
  #include <sstream>
6
7
  #include <stdexcept>
8
+ #include <string>
9
+ #include <utility>
10
+ #include <vector>
7
11
 
8
12
  // fasttext
9
13
  #include <args.h>
10
14
  #include <autotune.h>
11
15
  #include <densematrix.h>
16
+ #include <dictionary.h>
12
17
  #include <fasttext.h>
18
+ #include <meter.h>
13
19
  #include <real.h>
14
20
  #include <vector.h>
15
21
 
@@ -20,41 +26,35 @@
20
26
  using fasttext::Args;
21
27
  using fasttext::FastText;
22
28
 
23
- using Rice::Array;
24
- using Rice::Constructor;
25
- using Rice::Module;
26
- using Rice::define_class_under;
27
- using Rice::define_module;
28
- using Rice::define_module_under;
29
-
30
- namespace Rice::detail
31
- {
29
+ namespace Rice::detail {
32
30
  template<>
33
- class To_Ruby<std::vector<std::pair<fasttext::real, std::string>>>
34
- {
31
+ class To_Ruby<std::vector<std::pair<fasttext::real, std::string>>> {
35
32
  public:
36
- VALUE convert(std::vector<std::pair<fasttext::real, std::string>> const & x)
37
- {
38
- Array ret;
33
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
34
+
35
+ VALUE convert(const std::vector<std::pair<fasttext::real, std::string>>& x) {
36
+ VALUE ret = detail::protect(rb_ary_new2, x.size());
39
37
  for (const auto& v : x) {
40
- Array a;
41
- a.push(v.first);
42
- a.push(v.second);
43
- ret.push(a);
38
+ VALUE p1 = To_Ruby<fasttext::real>().convert(v.first);
39
+ VALUE p2 = To_Ruby<std::string>().convert(v.second);
40
+ VALUE a = detail::protect(rb_ary_new3, 2, p1, p2);
41
+ detail::protect(rb_ary_push, ret, a);
44
42
  }
45
43
  return ret;
46
44
  }
45
+
46
+ private:
47
+ Arg* arg_ = nullptr;
47
48
  };
48
- }
49
+ } // namespace Rice::detail
49
50
 
50
51
  extern "C"
51
- void Init_ext()
52
- {
53
- Module rb_mFastText = define_module("FastText");
54
- Module rb_mExt = define_module_under(rb_mFastText, "Ext");
52
+ void Init_ext() {
53
+ Rice::Module rb_mFastText = Rice::define_module("FastText");
54
+ Rice::Module rb_mExt = Rice::define_module_under(rb_mFastText, "Ext");
55
55
 
56
- define_class_under<Args>(rb_mExt, "Args")
57
- .define_constructor(Constructor<Args>())
56
+ Rice::define_class_under<Args>(rb_mExt, "Args")
57
+ .define_constructor(Rice::Constructor<Args>())
58
58
  .define_attr("input", &Args::input)
59
59
  .define_attr("output", &Args::output)
60
60
  .define_attr("lr", &Args::lr)
@@ -110,24 +110,24 @@ void Init_ext()
110
110
  .define_attr("autotune_duration", &Args::autotuneDuration)
111
111
  .define_attr("autotune_model_size", &Args::autotuneModelSize);
112
112
 
113
- define_class_under<FastText>(rb_mExt, "Model")
114
- .define_constructor(Constructor<FastText>())
113
+ Rice::define_class_under<FastText>(rb_mExt, "Model")
114
+ .define_constructor(Rice::Constructor<FastText>())
115
115
  .define_method(
116
116
  "words",
117
117
  [](FastText& m) {
118
118
  std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
119
119
  std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::word);
120
120
 
121
- Array vocab_list;
122
- Array vocab_freq;
121
+ Rice::Array vocab_list;
122
+ Rice::Array vocab_freq;
123
123
  for (int32_t i = 0; i < d->nwords(); i++) {
124
- vocab_list.push(d->getWord(i));
125
- vocab_freq.push(freq[i]);
124
+ vocab_list.push(d->getWord(i), false);
125
+ vocab_freq.push(freq.at(i), false);
126
126
  }
127
127
 
128
- Array ret;
129
- ret.push(vocab_list);
130
- ret.push(vocab_freq);
128
+ Rice::Array ret;
129
+ ret.push(vocab_list, false);
130
+ ret.push(vocab_freq, false);
131
131
  return ret;
132
132
  })
133
133
  .define_method(
@@ -136,22 +136,22 @@ void Init_ext()
136
136
  std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
137
137
  std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::label);
138
138
 
139
- Array vocab_list;
140
- Array vocab_freq;
139
+ Rice::Array vocab_list;
140
+ Rice::Array vocab_freq;
141
141
  for (int32_t i = 0; i < d->nlabels(); i++) {
142
- vocab_list.push(d->getLabel(i));
143
- vocab_freq.push(freq[i]);
142
+ vocab_list.push(d->getLabel(i), false);
143
+ vocab_freq.push(freq.at(i), false);
144
144
  }
145
145
 
146
- Array ret;
147
- ret.push(vocab_list);
148
- ret.push(vocab_freq);
146
+ Rice::Array ret;
147
+ ret.push(vocab_list, false);
148
+ ret.push(vocab_freq, false);
149
149
  return ret;
150
150
  })
151
151
  .define_method(
152
152
  "test",
153
153
  [](FastText& m, const std::string& filename, int32_t k) {
154
- std::ifstream ifs(filename);
154
+ std::ifstream ifs{filename};
155
155
  if (!ifs.is_open()) {
156
156
  throw std::invalid_argument("Test file cannot be opened!");
157
157
  }
@@ -159,21 +159,21 @@ void Init_ext()
159
159
  m.test(ifs, k, 0.0, meter);
160
160
  ifs.close();
161
161
 
162
- Array ret;
163
- ret.push(meter.nexamples());
164
- ret.push(meter.precision());
165
- ret.push(meter.recall());
162
+ Rice::Array ret;
163
+ ret.push(meter.nexamples(), false);
164
+ ret.push(meter.precision(), false);
165
+ ret.push(meter.recall(), false);
166
166
  return ret;
167
167
  })
168
168
  .define_method(
169
169
  "load_model",
170
- [](FastText& m, const std::string& s) {
171
- m.loadModel(s);
170
+ [](FastText& m, const std::string& filename) {
171
+ m.loadModel(filename);
172
172
  })
173
173
  .define_method(
174
174
  "save_model",
175
- [](FastText& m, const std::string& s) {
176
- m.saveModel(s);
175
+ [](FastText& m, const std::string& filename) {
176
+ m.saveModel(filename);
177
177
  })
178
178
  .define_method("dimension", &FastText::getDimension)
179
179
  .define_method("quantized?", &FastText::isQuant)
@@ -182,7 +182,7 @@ void Init_ext()
182
182
  .define_method(
183
183
  "predict",
184
184
  [](FastText& m, const std::string& text, int32_t k, float threshold) {
185
- std::stringstream ioss(text);
185
+ std::stringstream ioss{text};
186
186
  std::vector<std::pair<fasttext::real, std::string>> predictions;
187
187
  m.predictLine(ioss, predictions, k, threshold);
188
188
  return predictions;
@@ -197,12 +197,13 @@ void Init_ext()
197
197
  .define_method(
198
198
  "word_vector",
199
199
  [](FastText& m, const std::string& word) {
200
- auto dimension = m.getDimension();
201
- fasttext::Vector vec = fasttext::Vector(dimension);
200
+ int dimension = m.getDimension();
201
+ fasttext::Vector vec{dimension};
202
202
  m.getWordVector(vec, word);
203
- Array ret;
204
- for (size_t i = 0; i < vec.size(); i++) {
205
- ret.push(vec[i]);
203
+ Rice::Array ret;
204
+ // fasttext::Vector uses int64_t for size and indexing
205
+ for (int64_t i = 0; i < vec.size(); i++) {
206
+ ret.push(vec[i], false);
206
207
  }
207
208
  return ret;
208
209
  })
@@ -214,22 +215,23 @@ void Init_ext()
214
215
  std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
215
216
  d->getSubwords(word, ngrams, subwords);
216
217
 
217
- Array ret;
218
+ Rice::Array ret;
218
219
  for (const auto& subword : subwords) {
219
- ret.push(subword);
220
+ ret.push(subword, false);
220
221
  }
221
222
  return ret;
222
223
  })
223
224
  .define_method(
224
225
  "sentence_vector",
225
226
  [](FastText& m, const std::string& text) {
226
- std::istringstream in(text);
227
- auto dimension = m.getDimension();
228
- fasttext::Vector vec = fasttext::Vector(dimension);
227
+ std::istringstream in{text};
228
+ int dimension = m.getDimension();
229
+ fasttext::Vector vec{dimension};
229
230
  m.getSentenceVector(in, vec);
230
- Array ret;
231
- for (size_t i = 0; i < vec.size(); i++) {
232
- ret.push(vec[i]);
231
+ Rice::Array ret;
232
+ // fasttext::Vector uses int64_t for size and indexing
233
+ for (int64_t i = 0; i < vec.size(); i++) {
234
+ ret.push(vec[i], false);
233
235
  }
234
236
  return ret;
235
237
  })
@@ -237,7 +239,7 @@ void Init_ext()
237
239
  "train",
238
240
  [](FastText& m, Args& a) {
239
241
  if (a.hasAutotune()) {
240
- fasttext::Autotune autotune(std::shared_ptr<fasttext::FastText>(&m, [](fasttext::FastText*) {}));
242
+ fasttext::Autotune autotune{std::shared_ptr<fasttext::FastText>(&m, [](fasttext::FastText*) {})};
241
243
  autotune.train(a);
242
244
  } else {
243
245
  m.train(a);
@@ -1,3 +1,3 @@
1
1
  module FastText
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/fasttext.rb CHANGED
@@ -23,7 +23,7 @@ module FastText
23
23
  else
24
24
  FastText::Vectorizer.new
25
25
  end
26
- model.instance_variable_set("@m", m)
26
+ model.instance_variable_set(:@m, m)
27
27
  model
28
28
  end
29
29
 
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fasttext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-10-22 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rice
@@ -16,15 +15,14 @@ dependencies:
16
15
  requirements:
17
16
  - - ">="
18
17
  - !ruby/object:Gem::Version
19
- version: 4.3.3
18
+ version: '4.7'
20
19
  type: :runtime
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
- version: 4.3.3
27
- description:
25
+ version: '4.7'
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions:
@@ -75,7 +73,6 @@ homepage: https://github.com/ankane/fastText-ruby
75
73
  licenses:
76
74
  - MIT
77
75
  metadata: {}
78
- post_install_message:
79
76
  rdoc_options: []
80
77
  require_paths:
81
78
  - lib
@@ -83,15 +80,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
83
80
  requirements:
84
81
  - - ">="
85
82
  - !ruby/object:Gem::Version
86
- version: '3.1'
83
+ version: '3.3'
87
84
  required_rubygems_version: !ruby/object:Gem::Requirement
88
85
  requirements:
89
86
  - - ">="
90
87
  - !ruby/object:Gem::Version
91
88
  version: '0'
92
89
  requirements: []
93
- rubygems_version: 3.5.16
94
- signing_key:
90
+ rubygems_version: 4.0.6
95
91
  specification_version: 4
96
92
  summary: Efficient text classification and representation learning for Ruby
97
93
  test_files: []