RubyGems - fasttext - Versions diffs - 0.1.2 → 0.1.3 - Mend

fasttext 0.1.2 → 0.1.3

Files changed (27) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +18 -8
data/ext/fasttext/ext.cpp +66 -35
data/ext/fasttext/extconf.rb +2 -3
data/lib/fasttext/classifier.rb +13 -3
data/lib/fasttext/vectorizer.rb +6 -1
data/lib/fasttext/version.rb +1 -1
data/vendor/fastText/README.md +3 -3
data/vendor/fastText/src/args.cc +179 -6
data/vendor/fastText/src/args.h +29 -1
data/vendor/fastText/src/autotune.cc +477 -0
data/vendor/fastText/src/autotune.h +89 -0
data/vendor/fastText/src/densematrix.cc +27 -7
data/vendor/fastText/src/densematrix.h +10 -2
data/vendor/fastText/src/fasttext.cc +125 -114
data/vendor/fastText/src/fasttext.h +31 -52
data/vendor/fastText/src/main.cc +32 -13
data/vendor/fastText/src/meter.cc +148 -2
data/vendor/fastText/src/meter.h +24 -2
data/vendor/fastText/src/model.cc +0 -1
data/vendor/fastText/src/real.h +0 -1
data/vendor/fastText/src/utils.cc +25 -0
data/vendor/fastText/src/utils.h +29 -0
data/vendor/fastText/src/vector.cc +0 -1
metadata +5 -4
data/lib/fasttext/ext.bundle +0 -0

data/vendor/fastText/src/autotune.h ADDED

@@ -0,0 +1,89 @@
+/**
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+#include <istream>
+#include <memory>
+#include <random>
+#include <thread>
+#include <vector>
+#include "args.h"
+#include "fasttext.h"
+namespace fasttext {
+class AutotuneStrategy {
+ private:
+  Args bestArgs_;
+  int maxDuration_;
+  std::minstd_rand rng_;
+  int trials_;
+  int bestMinnIndex_;
+  int bestDsubExponent_;
+  int bestNonzeroBucket_;
+  int originalBucket_;
+  std::vector<int> minnChoices_;
+  int getIndex(int val, const std::vector<int>& choices);
+ public:
+  explicit AutotuneStrategy(
+      const Args& args,
+      std::minstd_rand::result_type seed);
+  Args ask(double elapsed);
+  void updateBest(const Args& args);
+};
+class Autotune {
+ protected:
+  std::shared_ptr<FastText> fastText_;
+  double elapsed_;
+  double bestScore_;
+  int32_t trials_;
+  int32_t sizeConstraintFailed_;
+  std::atomic<bool> continueTraining_;
+  std::unique_ptr<AutotuneStrategy> strategy_;
+  std::thread timer_;
+  bool keepTraining(double maxDuration) const;
+  void printInfo(double maxDuration);
+  void timer(
+      const std::chrono::steady_clock::time_point& start,
+      double maxDuration);
+  void abort();
+  void startTimer(const Args& args);
+  double getMetricScore(
+      Meter& meter,
+      const metric_name& metricName,
+      const double metricValue,
+      const std::string& metricLabel) const;
+  void printArgs(const Args& args, const Args& autotuneArgs);
+  void printSkippedArgs(const Args& autotuneArgs);
+  bool quantize(Args& args, const Args& autotuneArgs);
+  int getCutoffForFileSize(bool qout, bool qnorm, int dsub, int64_t fileSize)
+      const;
+  class TimeoutError : public std::runtime_error {
+   public:
+    TimeoutError() : std::runtime_error("Autotune timed out.") {}
+  };
+ public:
+  Autotune() = delete;
+  explicit Autotune(const std::shared_ptr<FastText>& fastText);
+  Autotune(const Autotune&) = delete;
+  Autotune(Autotune&&) = delete;
+  Autotune& operator=(const Autotune&) = delete;
+  Autotune& operator=(Autotune&&) = delete;
+  ~Autotune() noexcept = default;
+  void train(const Args& args);
+};
+} // namespace fasttext

data/vendor/fastText/src/densematrix.cc CHANGED

@@ -8,11 +8,10 @@
 #include "densematrix.h"
-#include <exception>
 #include <random>
 #include <stdexcept>
+#include <thread>
 #include <utility>
 #include "utils.h"
 #include "vector.h"
@@ -25,18 +24,39 @@ DenseMatrix::DenseMatrix(int64_t m, int64_t n) : Matrix(m, n), data_(m * n) {}
 DenseMatrix::DenseMatrix(DenseMatrix&& other) noexcept
     : Matrix(other.m_, other.n_), data_(std::move(other.data_)) {}
+DenseMatrix::DenseMatrix(int64_t m, int64_t n, real* dataPtr)
+    : Matrix(m, n), data_(dataPtr, dataPtr + (m * n)) {}
 void DenseMatrix::zero() {
   std::fill(data_.begin(), data_.end(), 0.0);
 }
-void DenseMatrix::uniform(real a) {
-  std::minstd_rand rng(1);
+void DenseMatrix::uniformThread(real a, int block, int32_t seed) {
+  std::minstd_rand rng(block + seed);
   std::uniform_real_distribution<> uniform(-a, a);
-  for (int64_t i = 0; i < (m_ * n_); i++) {
+  int64_t blockSize = (m_ * n_) / 10;
+  for (int64_t i = blockSize * block;
+       i < (m_ * n_) && i < blockSize * (block + 1);
+       i++) {
     data_[i] = uniform(rng);
   }
 }
+void DenseMatrix::uniform(real a, unsigned int thread, int32_t seed) {
+  if (thread > 1) {
+    std::vector<std::thread> threads;
+    for (int i = 0; i < thread; i++) {
+      threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
+    }
+    for (int32_t i = 0; i < threads.size(); i++) {
+      threads[i].join();
+    }
+  } else {
+    // webassembly can't instantiate `std::thread`
+    uniformThread(a, 0, seed);
+  }
+}
 void DenseMatrix::multiplyRow(const Vector& nums, int64_t ib, int64_t ie) {
   if (ie == -1) {
     ie = m_;
@@ -73,7 +93,7 @@ real DenseMatrix::l2NormRow(int64_t i) const {
     norm += at(i, j) * at(i, j);
   }
   if (std::isnan(norm)) {
-    throw std::runtime_error("Encountered NaN.");
+    throw EncounteredNaNError();
   }
   return std::sqrt(norm);
 }
@@ -94,7 +114,7 @@ real DenseMatrix::dotRow(const Vector& vec, int64_t i) const {
     d += at(i, j) * vec[j];
   }
   if (std::isnan(d)) {
-    throw std::runtime_error("Encountered NaN.");
+    throw EncounteredNaNError();
   }
   return d;
 }

data/vendor/fastText/src/densematrix.h CHANGED

@@ -8,12 +8,13 @@
 #pragma once
+#include <assert.h>
 #include <cstdint>
 #include <istream>
 #include <ostream>
+#include <stdexcept>
 #include <vector>
-#include <assert.h>
 #include "matrix.h"
 #include "real.h"
@@ -24,10 +25,12 @@ class Vector;
 class DenseMatrix : public Matrix {
  protected:
   std::vector<real> data_;
+  void uniformThread(real, int, int32_t);
  public:
   DenseMatrix();
   explicit DenseMatrix(int64_t, int64_t);
+  explicit DenseMatrix(int64_t m, int64_t n, real* dataPtr);
   DenseMatrix(const DenseMatrix&) = default;
   DenseMatrix(DenseMatrix&&) noexcept;
   DenseMatrix& operator=(const DenseMatrix&) = delete;
@@ -56,7 +59,7 @@ class DenseMatrix : public Matrix {
     return n_;
   }
   void zero();
-  void uniform(real);
+  void uniform(real, unsigned int, int32_t);
   void multiplyRow(const Vector& nums, int64_t ib = 0, int64_t ie = -1);
   void divideRow(const Vector& denoms, int64_t ib = 0, int64_t ie = -1);
@@ -71,5 +74,10 @@ class DenseMatrix : public Matrix {
   void save(std::ostream&) const override;
   void load(std::istream&) override;
   void dump(std::ostream&) const override;
+  class EncounteredNaNError : public std::runtime_error {
+   public:
+    EncounteredNaNError() : std::runtime_error("Encountered NaN.") {}
+  };
 };
 } // namespace fasttext

data/vendor/fastText/src/fasttext.cc CHANGED

@@ -47,7 +47,8 @@ std::shared_ptr<Loss> FastText::createLoss(std::shared_ptr<Matrix>& output) {
   }
 }
-FastText::FastText() : quant_(false), wordVectors_(nullptr) {}
+FastText::FastText()
+    : quant_(false), wordVectors_(nullptr), trainException_(nullptr) {}
 void FastText::addInputVector(Vector& vec, int32_t ind) const {
   vec.addRow(*input_, ind);
@@ -69,6 +70,19 @@ std::shared_ptr<const DenseMatrix> FastText::getInputMatrix() const {
   return std::dynamic_pointer_cast<DenseMatrix>(input_);
 }
+void FastText::setMatrices(
+    const std::shared_ptr<DenseMatrix>& inputMatrix,
+    const std::shared_ptr<DenseMatrix>& outputMatrix) {
+  assert(input_->size(1) == output_->size(1));
+  input_ = std::dynamic_pointer_cast<Matrix>(inputMatrix);
+  output_ = std::dynamic_pointer_cast<Matrix>(outputMatrix);
+  wordVectors_.reset();
+  args_->dim = input_->size(1);
+  buildModel();
+}
 std::shared_ptr<const DenseMatrix> FastText::getOutputMatrix() const {
   if (quant_ && args_->qout) {
     throw std::runtime_error("Can't export quantized matrix");
@@ -86,6 +100,14 @@ int32_t FastText::getSubwordId(const std::string& subword) const {
   return dict_->nwords() + h;
 }
+int32_t FastText::getLabelId(const std::string& label) const {
+  int32_t labelId = dict_->getId(label);
+  if (labelId != -1) {
+    labelId -= dict_->nwords();
+  }
+  return labelId;
+}
 void FastText::getWordVector(Vector& vec, const std::string& word) const {
   const std::vector<int32_t>& ngrams = dict_->getSubwords(word);
   vec.zero();
@@ -97,10 +119,6 @@ void FastText::getWordVector(Vector& vec, const std::string& word) const {
   }
 }
-void FastText::getVector(Vector& vec, const std::string& word) const {
-  getWordVector(vec, word);
-}
 void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
   vec.zero();
   int32_t h = dict_->hash(subword) % args_->bucket;
@@ -109,6 +127,9 @@ void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
 }
 void FastText::saveVectors(const std::string& filename) {
+  if (!input_ || !output_) {
+    throw std::runtime_error("Model never trained");
+  }
   std::ofstream ofs(filename);
   if (!ofs.is_open()) {
     throw std::invalid_argument(
@@ -124,10 +145,6 @@ void FastText::saveVectors(const std::string& filename) {
   ofs.close();
 }
-void FastText::saveVectors() {
-  saveVectors(args_->output + ".vec");
-}
 void FastText::saveOutput(const std::string& filename) {
   std::ofstream ofs(filename);
   if (!ofs.is_open()) {
@@ -152,10 +169,6 @@ void FastText::saveOutput(const std::string& filename) {
   ofs.close();
 }
-void FastText::saveOutput() {
-  saveOutput(args_->output + ".output");
-}
 bool FastText::checkModel(std::istream& in) {
   int32_t magic;
   in.read((char*)&(magic), sizeof(int32_t));
@@ -176,21 +189,14 @@ void FastText::signModel(std::ostream& out) {
   out.write((char*)&(version), sizeof(int32_t));
 }
-void FastText::saveModel() {
-  std::string fn(args_->output);
-  if (quant_) {
-    fn += ".ftz";
-  } else {
-    fn += ".bin";
-  }
-  saveModel(fn);
-}
 void FastText::saveModel(const std::string& filename) {
   std::ofstream ofs(filename, std::ofstream::binary);
   if (!ofs.is_open()) {
     throw std::invalid_argument(filename + " cannot be opened for saving!");
   }
+  if (!input_ || !output_) {
+    throw std::runtime_error("Model never trained");
+  }
   signModel(ofs);
   args_->save(ofs);
   dict_->save(ofs);
@@ -224,6 +230,12 @@ std::vector<int64_t> FastText::getTargetCounts() const {
   }
 }
+void FastText::buildModel() {
+  auto loss = createLoss(output_);
+  bool normalizeGradient = (args_->model == model_name::sup);
+  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
+}
 void FastText::loadModel(std::istream& in) {
   args_ = std::make_shared<Args>();
   input_ = std::make_shared<DenseMatrix>();
@@ -256,37 +268,37 @@ void FastText::loadModel(std::istream& in) {
   }
   output_->load(in);
-  auto loss = createLoss(output_);
-  bool normalizeGradient = (args_->model == model_name::sup);
-  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
+  buildModel();
 }
-void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
-  std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
-  double t =
-      std::chrono::duration_cast<std::chrono::duration<double>>(end - start_)
-          .count();
+std::tuple<int64_t, double, double> FastText::progressInfo(real progress) {
+  double t = utils::getDuration(start_, std::chrono::steady_clock::now());
   double lr = args_->lr * (1.0 - progress);
   double wst = 0;
   int64_t eta = 2592000; // Default to one month in seconds (720 * 3600)
   if (progress > 0 && t >= 0) {
-    progress = progress * 100;
-    eta = t * (100 - progress) / progress;
+    eta = t * (1 - progress) / progress;
     wst = double(tokenCount_) / t / args_->thread;
   }
-  int32_t etah = eta / 3600;
-  int32_t etam = (eta % 3600) / 60;
+  return std::tuple<double, double, int64_t>(wst, lr, eta);
+}
+void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
+  double wst;
+  double lr;
+  int64_t eta;
+  std::tie<double, double, int64_t>(wst, lr, eta) = progressInfo(progress);
   log_stream << std::fixed;
   log_stream << "Progress: ";
-  log_stream << std::setprecision(1) << std::setw(5) << progress << "%";
+  log_stream << std::setprecision(1) << std::setw(5) << (progress * 100) << "%";
   log_stream << " words/sec/thread: " << std::setw(7) << int64_t(wst);
   log_stream << " lr: " << std::setw(9) << std::setprecision(6) << lr;
-  log_stream << " loss: " << std::setw(9) << std::setprecision(6) << loss;
-  log_stream << " ETA: " << std::setw(3) << etah;
-  log_stream << "h" << std::setw(2) << etam << "m";
+  log_stream << " avg.loss: " << std::setw(9) << std::setprecision(6) << loss;
+  log_stream << " ETA: " << utils::ClockPrint(eta);
   log_stream << std::flush;
 }
@@ -299,13 +311,16 @@ std::vector<int32_t> FastText::selectEmbeddings(int32_t cutoff) const {
   std::iota(idx.begin(), idx.end(), 0);
   auto eosid = dict_->getId(Dictionary::EOS);
   std::sort(idx.begin(), idx.end(), [&norms, eosid](size_t i1, size_t i2) {
+    if (i1 == eosid && i2 == eosid) { // satisfy strict weak ordering
+      return false;
+    }
     return eosid == i1 || (eosid != i2 && norms[i1] > norms[i2]);
   });
   idx.erase(idx.begin() + cutoff, idx.end());
   return idx;
 }
-void FastText::quantize(const Args& qargs) {
+void FastText::quantize(const Args& qargs, const TrainCallback& callback) {
   if (args_->model != model_name::sup) {
     throw std::invalid_argument(
         "For now we only support quantization of supervised models");
@@ -337,10 +352,9 @@ void FastText::quantize(const Args& qargs) {
       args_->verbose = qargs.verbose;
       auto loss = createLoss(output_);
       model_ = std::make_shared<Model>(input, output, loss, normalizeGradient);
-      startThreads();
+      startThreads(callback);
     }
   }
   input_ = std::make_shared<QuantMatrix>(
       std::move(*(input.get())), qargs.dsub, qargs.qnorm);
@@ -348,7 +362,6 @@ void FastText::quantize(const Args& qargs) {
     output_ = std::make_shared<QuantMatrix>(
         std::move(*(output.get())), 2, qargs.qnorm);
   }
   quant_ = true;
   auto loss = createLoss(output_);
   model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
@@ -408,7 +421,7 @@ void FastText::skipgram(
 std::tuple<int64_t, double, double>
 FastText::test(std::istream& in, int32_t k, real threshold) {
-  Meter meter;
+  Meter meter(false);
   test(in, k, threshold, meter);
   return std::tuple<int64_t, double, double>(
@@ -420,6 +433,9 @@ void FastText::test(std::istream& in, int32_t k, real threshold, Meter& meter)
   std::vector<int32_t> line;
   std::vector<int32_t> labels;
   Predictions predictions;
+  Model::State state(args_->dim, dict_->nlabels(), 0);
+  in.clear();
+  in.seekg(0, std::ios_base::beg);
   while (in.peek() != EOF) {
     line.clear();
@@ -521,16 +537,6 @@ std::vector<std::pair<std::string, Vector>> FastText::getNgramVectors(
   return result;
 }
-// deprecated. use getNgramVectors instead
-void FastText::ngramVectors(std::string word) {
-  std::vector<std::pair<std::string, Vector>> ngramVectors =
-      getNgramVectors(word);
-  for (const auto& ngramVector : ngramVectors) {
-    std::cout << ngramVector.first << " " << ngramVector.second << std::endl;
-  }
-}
 void FastText::precomputeWordVectors(DenseMatrix& wordVectors) {
   Vector vec(args_->dim);
   wordVectors.zero();
@@ -598,17 +604,6 @@ std::vector<std::pair<real, std::string>> FastText::getNN(
   return heap;
 }
-// depracted. use getNN instead
-void FastText::findNN(
-    const DenseMatrix& wordVectors,
-    const Vector& query,
-    int32_t k,
-    const std::set<std::string>& banSet,
-    std::vector<std::pair<real, std::string>>& results) {
-  results.clear();
-  results = getNN(wordVectors, query, k, banSet);
-}
 std::vector<std::pair<real, std::string>> FastText::getAnalogies(
     int32_t k,
     const std::string& wordA,
@@ -630,52 +625,52 @@ std::vector<std::pair<real, std::string>> FastText::getAnalogies(
   return getNN(*wordVectors_, query, k, {wordA, wordB, wordC});
 }
-// depreacted, use getAnalogies instead
-void FastText::analogies(int32_t k) {
-  std::string prompt("Query triplet (A - B + C)? ");
-  std::string wordA, wordB, wordC;
-  std::cout << prompt;
-  while (true) {
-    std::cin >> wordA;
-    std::cin >> wordB;
-    std::cin >> wordC;
-    auto results = getAnalogies(k, wordA, wordB, wordC);
-    for (auto& pair : results) {
-      std::cout << pair.second << " " << pair.first << std::endl;
-    }
-    std::cout << prompt;
-  }
+bool FastText::keepTraining(const int64_t ntokens) const {
+  return tokenCount_ < args_->epoch * ntokens && !trainException_;
 }
-void FastText::trainThread(int32_t threadId) {
+void FastText::trainThread(int32_t threadId, const TrainCallback& callback) {
   std::ifstream ifs(args_->input);
   utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
-  Model::State state(args_->dim, output_->size(0), threadId);
+  Model::State state(args_->dim, output_->size(0), threadId + args_->seed);
   const int64_t ntokens = dict_->ntokens();
   int64_t localTokenCount = 0;
   std::vector<int32_t> line, labels;
-  while (tokenCount_ < args_->epoch * ntokens) {
-    real progress = real(tokenCount_) / (args_->epoch * ntokens);
-    real lr = args_->lr * (1.0 - progress);
-    if (args_->model == model_name::sup) {
-      localTokenCount += dict_->getLine(ifs, line, labels);
-      supervised(state, lr, line, labels);
-    } else if (args_->model == model_name::cbow) {
-      localTokenCount += dict_->getLine(ifs, line, state.rng);
-      cbow(state, lr, line);
-    } else if (args_->model == model_name::sg) {
-      localTokenCount += dict_->getLine(ifs, line, state.rng);
-      skipgram(state, lr, line);
-    }
-    if (localTokenCount > args_->lrUpdateRate) {
-      tokenCount_ += localTokenCount;
-      localTokenCount = 0;
-      if (threadId == 0 && args_->verbose > 1)
-        loss_ = state.getLoss();
+  uint64_t callbackCounter = 0;
+  try {
+    while (keepTraining(ntokens)) {
+      real progress = real(tokenCount_) / (args_->epoch * ntokens);
+      if (callback && ((callbackCounter++ % 64) == 0)) {
+        double wst;
+        double lr;
+        int64_t eta;
+        std::tie<double, double, int64_t>(wst, lr, eta) =
+            progressInfo(progress);
+        callback(progress, loss_, wst, lr, eta);
+      }
+      real lr = args_->lr * (1.0 - progress);
+      if (args_->model == model_name::sup) {
+        localTokenCount += dict_->getLine(ifs, line, labels);
+        supervised(state, lr, line, labels);
+      } else if (args_->model == model_name::cbow) {
+        localTokenCount += dict_->getLine(ifs, line, state.rng);
+        cbow(state, lr, line);
+      } else if (args_->model == model_name::sg) {
+        localTokenCount += dict_->getLine(ifs, line, state.rng);
+        skipgram(state, lr, line);
+      }
+      if (localTokenCount > args_->lrUpdateRate) {
+        tokenCount_ += localTokenCount;
+        localTokenCount = 0;
+        if (threadId == 0 && args_->verbose > 1) {
+          loss_ = state.getLoss();
+        }
+      }
     }
+  } catch (DenseMatrix::EncounteredNaNError&) {
+    trainException_ = std::current_exception();
   }
   if (threadId == 0)
     loss_ = state.getLoss();
@@ -713,7 +708,7 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
   dict_->init();
   std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
       dict_->nwords() + args_->bucket, args_->dim);
-  input->uniform(1.0 / args_->dim);
+  input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
   for (size_t i = 0; i < n; i++) {
     int32_t idx = dict_->getId(words[i]);
@@ -727,14 +722,10 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
   return input;
 }
-void FastText::loadVectors(const std::string& filename) {
-  input_ = getInputMatrixFromFile(filename);
-}
 std::shared_ptr<Matrix> FastText::createRandomMatrix() const {
   std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
       dict_->nwords() + args_->bucket, args_->dim);
-  input->uniform(1.0 / args_->dim);
+  input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
   return input;
 }
@@ -749,7 +740,7 @@ std::shared_ptr<Matrix> FastText::createTrainOutputMatrix() const {
   return output;
 }
-void FastText::train(const Args& args) {
+void FastText::train(const Args& args, const TrainCallback& callback) {
   args_ = std::make_shared<Args>(args);
   dict_ = std::make_shared<Dictionary>(args_);
   if (args_->input == "-") {
@@ -770,23 +761,38 @@ void FastText::train(const Args& args) {
     input_ = createRandomMatrix();
   }
   output_ = createTrainOutputMatrix();
+  quant_ = false;
   auto loss = createLoss(output_);
   bool normalizeGradient = (args_->model == model_name::sup);
   model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
-  startThreads();
+  startThreads(callback);
+}
+void FastText::abort() {
+  try {
+    throw AbortError();
+  } catch (AbortError&) {
+    trainException_ = std::current_exception();
+  }
 }
-void FastText::startThreads() {
+void FastText::startThreads(const TrainCallback& callback) {
   start_ = std::chrono::steady_clock::now();
   tokenCount_ = 0;
   loss_ = -1;
+  trainException_ = nullptr;
   std::vector<std::thread> threads;
-  for (int32_t i = 0; i < args_->thread; i++) {
-    threads.push_back(std::thread([=]() { trainThread(i); }));
+  if (args_->thread > 1) {
+    for (int32_t i = 0; i < args_->thread; i++) {
+      threads.push_back(std::thread([=]() { trainThread(i, callback); }));
+    }
+  } else {
+    // webassembly can't instantiate `std::thread`
+    trainThread(0, callback);
   }
   const int64_t ntokens = dict_->ntokens();
   // Same condition as trainThread
-  while (tokenCount_ < args_->epoch * ntokens) {
+  while (keepTraining(ntokens)) {
     std::this_thread::sleep_for(std::chrono::milliseconds(100));
     if (loss_ >= 0 && args_->verbose > 1) {
       real progress = real(tokenCount_) / (args_->epoch * ntokens);
@@ -794,9 +800,14 @@ void FastText::startThreads() {
       printInfo(progress, loss_, std::cerr);
     }
   }
-  for (int32_t i = 0; i < args_->thread; i++) {
+  for (int32_t i = 0; i < threads.size(); i++) {
     threads[i].join();
   }
+  if (trainException_) {
+    std::exception_ptr exception = trainException_;
+    trainException_ = nullptr;
+    std::rethrow_exception(exception);
+  }
   if (args_->verbose > 0) {
     std::cerr << "\r";
     printInfo(1.0, loss_, std::cerr);