RubyGems - fasttext - Versions diffs - 0.1.2 → 0.1.3 - Mend

fasttext 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +18 -8
data/ext/fasttext/ext.cpp +66 -35
data/ext/fasttext/extconf.rb +2 -3
data/lib/fasttext/classifier.rb +13 -3
data/lib/fasttext/vectorizer.rb +6 -1
data/lib/fasttext/version.rb +1 -1
data/vendor/fastText/README.md +3 -3
data/vendor/fastText/src/args.cc +179 -6
data/vendor/fastText/src/args.h +29 -1
data/vendor/fastText/src/autotune.cc +477 -0
data/vendor/fastText/src/autotune.h +89 -0
data/vendor/fastText/src/densematrix.cc +27 -7
data/vendor/fastText/src/densematrix.h +10 -2
data/vendor/fastText/src/fasttext.cc +125 -114
data/vendor/fastText/src/fasttext.h +31 -52
data/vendor/fastText/src/main.cc +32 -13
data/vendor/fastText/src/meter.cc +148 -2
data/vendor/fastText/src/meter.h +24 -2
data/vendor/fastText/src/model.cc +0 -1
data/vendor/fastText/src/real.h +0 -1
data/vendor/fastText/src/utils.cc +25 -0
data/vendor/fastText/src/utils.h +29 -0
data/vendor/fastText/src/vector.cc +0 -1
metadata +5 -4
data/lib/fasttext/ext.bundle +0 -0

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f83be8c01c6a45a90758ccee430b3898396bcfdda5a2c338126ed9dc3620aea5
-  data.tar.gz: 7e3dee8eb3afe12745f78448fd01ac68a2f0ac946bd01195f2f6e6081f62fbad
+  metadata.gz: fb6649e0a3992c6e12572d672e8ba768220662efc37b982c278a8d0713716029
+  data.tar.gz: 12a0441cf1030bfbfe99d26824fe757d5b83c2f47de899fecf13a73aa657bd76
 SHA512:
-  metadata.gz: be3117e1aceed3f6126fc1d84eb87caf53abb3be802a419ebcaa284cb567ee9ac033d5860842690bbd1c0477a6e5013dfc36eb96f6fb067a63015150fa18a1fe
-  data.tar.gz: dc2467f3f7317b5e1955ede144d9ad50c5abb1cc2b9dc5ad356350f192631b0142c91aecc4dd03b331d90f1cf65c7f3ba3ec176c17dc6b66a1226171261de1b6
+  metadata.gz: dadcbc9a0d39468b4d1070cbf74abece85e687fe2d5de9dfb03c8e79f630b47a0ce37bf73f1383bccf917190bf4f1a2acb12e4741b4aaf45d0c688991baf7893
+  data.tar.gz: b9615f2edf557b7a2bb9c90457581489dd86be7539e7f4e9b75ee72cb3fe26ca9a216c5ecda4ff6e82ef348fbcc0482cd4e727e9c6652f2b38195ad3edba1752

data/CHANGELOG.md CHANGED

@@ -1,3 +1,9 @@
+## 0.1.3 (2020-04-28)
+- Updated fastText to 0.9.2
+- Added support for autotune
+- Added `--with-optflags` option
 ## 0.1.2 (2020-01-10)
 - Fixed installation error with Ruby 2.7

data/README.md CHANGED

@@ -2,7 +2,7 @@
 [fastText](https://fasttext.cc) - efficient text classification and representation learning - for Ruby
-[![Build Status](https://travis-ci.org/ankane/fasttext.svg?branch=master)](https://travis-ci.org/ankane/fasttext)
+[![Build Status](https://travis-ci.org/ankane/fasttext.svg?branch=master)](https://travis-ci.org/ankane/fasttext) [![Build status](https://ci.appveyor.com/api/projects/status/67yby3w6mth766y9/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/fasttext/branch/master)
 ## Installation
@@ -77,6 +77,12 @@ model.labels
 > Use `include_freq: true` to get their frequency
+Search for the best hyperparameters
+```ruby
+model.fit(x, y, autotune_set: [x_valid, y_valid])
+```
 Compress the model - significantly reduces size but sacrifices a little performance
 ```ruby
@@ -168,7 +174,11 @@ FastText::Classifier.new(
   t: 0.0001,                  # sampling threshold
   label_prefix: "__label__"   # label prefix
   verbose: 2,                 # verbose
-  pretrained_vectors: nil     # pretrained word vectors (.vec file)
+  pretrained_vectors: nil,    # pretrained word vectors (.vec file)
+  autotune_metric: "f1",      # autotune optimization metric
+  autotune_predictions: 1,    # autotune predictions
+  autotune_duration: 300,     # autotune search time in seconds
+  autotune_model_size: nil    # autotune model size, like 2M
 )
 ```
@@ -200,7 +210,7 @@ FastText::Vectorizer.new(
 Input can be read directly from files
 ```ruby
-model.fit("train.txt")
+model.fit("train.txt", autotune_set: "valid.txt")
 model.test("test.txt")
 ```
@@ -260,12 +270,12 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
 - Write, clarify, or fix documentation
 - Suggest or add new features
-To get started with development and testing:
+To get started with development:
 ```sh
-git clone https://github.com/ankane/fasttext.git
-cd fasttext
+git clone https://github.com/ankane/fastText.git
+cd fastText
 bundle install
-rake compile
-rake test
+bundle exec rake compile
+bundle exec rake test
 ```

data/ext/fasttext/ext.cpp CHANGED

@@ -1,18 +1,33 @@
+// stdlib
+#include <cmath>
+#include <iterator>
+#include <sstream>
+#include <stdexcept>
+// fasttext
 #include <args.h>
+#include <autotune.h>
 #include <densematrix.h>
 #include <fasttext.h>
-#include <rice/Data_Type.hpp>
-#include <rice/Constructor.hpp>
-#include <rice/Array.hpp>
-#include <rice/Hash.hpp>
 #include <real.h>
 #include <vector.h>
-#include <cmath>
-#include <iterator>
-#include <sstream>
-#include <stdexcept>
-using namespace Rice;
+// rice
+#include <rice/Array.hpp>
+#include <rice/Constructor.hpp>
+#include <rice/Data_Type.hpp>
+#include <rice/Hash.hpp>
+using fasttext::FastText;
+using Rice::Array;
+using Rice::Constructor;
+using Rice::Hash;
+using Rice::Module;
+using Rice::Object;
+using Rice::define_class_under;
+using Rice::define_module;
+using Rice::define_module_under;
 template<>
 inline
@@ -104,8 +119,18 @@ fasttext::Args buildArgs(Hash h) {
       a.pretrainedVectors = from_ruby<std::string>(value);
     } else if (name == "save_output") {
       a.saveOutput = from_ruby<bool>(value);
-    // } else if (name == "seed") {
-    //   a.seed = from_ruby<int>(value);
+    } else if (name == "seed") {
+      a.seed = from_ruby<int>(value);
+    } else if (name == "autotune_validation_file") {
+      a.autotuneValidationFile = from_ruby<std::string>(value);
+    } else if (name == "autotune_metric") {
+      a.autotuneMetric = from_ruby<std::string>(value);
+    } else if (name == "autotune_predictions") {
+      a.autotunePredictions = from_ruby<int>(value);
+    } else if (name == "autotune_duration") {
+      a.autotuneDuration = from_ruby<int>(value);
+    } else if (name == "autotune_model_size") {
+      a.autotuneModelSize = from_ruby<std::string>(value);
     } else {
       throw std::invalid_argument("Unknown argument: " + name);
     }
@@ -119,11 +144,11 @@ void Init_ext()
   Module rb_mFastText = define_module("FastText");
   Module rb_mExt = define_module_under(rb_mFastText, "Ext");
-  define_class_under<fasttext::FastText>(rb_mExt, "Model")
-    .define_constructor(Constructor<fasttext::FastText>())
+  define_class_under<FastText>(rb_mExt, "Model")
+    .define_constructor(Constructor<FastText>())
     .define_method(
       "words",
-      *[](fasttext::FastText& m) {
+      *[](FastText& m) {
         std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
         std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::word);
@@ -141,7 +166,7 @@ void Init_ext()
       })
     .define_method(
       "labels",
-      *[](fasttext::FastText& m) {
+      *[](FastText& m) {
         std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
         std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::label);
@@ -159,12 +184,12 @@ void Init_ext()
       })
     .define_method(
       "test",
-      *[](fasttext::FastText& m, const std::string filename, int32_t k) {
+      *[](FastText& m, const std::string filename, int32_t k) {
         std::ifstream ifs(filename);
         if (!ifs.is_open()) {
           throw std::invalid_argument("Test file cannot be opened!");
         }
-        fasttext::Meter meter;
+        fasttext::Meter meter(false);
         m.test(ifs, k, 0.0, meter);
         ifs.close();
@@ -176,17 +201,17 @@ void Init_ext()
       })
     .define_method(
       "load_model",
-      *[](fasttext::FastText& m, std::string s) { m.loadModel(s); })
+      *[](FastText& m, std::string s) { m.loadModel(s); })
     .define_method(
       "save_model",
-      *[](fasttext::FastText& m, std::string s) { m.saveModel(s); })
-    .define_method("dimension", &fasttext::FastText::getDimension)
-    .define_method("quantized?", &fasttext::FastText::isQuant)
-    .define_method("word_id", &fasttext::FastText::getWordId)
-    .define_method("subword_id", &fasttext::FastText::getSubwordId)
+      *[](FastText& m, std::string s) { m.saveModel(s); })
+    .define_method("dimension", &FastText::getDimension)
+    .define_method("quantized?", &FastText::isQuant)
+    .define_method("word_id", &FastText::getWordId)
+    .define_method("subword_id", &FastText::getSubwordId)
     .define_method(
       "predict",
-      *[](fasttext::FastText& m, const std::string text, int32_t k, float threshold) {
+      *[](FastText& m, const std::string text, int32_t k, float threshold) {
         std::stringstream ioss(text);
         std::vector<std::pair<fasttext::real, std::string>> predictions;
         m.predictLine(ioss, predictions, k, threshold);
@@ -194,14 +219,14 @@ void Init_ext()
       })
     .define_method(
       "nearest_neighbors",
-      *[](fasttext::FastText& m, const std::string& word, int32_t k) {
+      *[](FastText& m, const std::string& word, int32_t k) {
         return m.getNN(word, k);
       })
-    .define_method("analogies", &fasttext::FastText::getAnalogies)
-    .define_method("ngram_vectors", &fasttext::FastText::getNgramVectors)
+    .define_method("analogies", &FastText::getAnalogies)
+    .define_method("ngram_vectors", &FastText::getNgramVectors)
     .define_method(
       "word_vector",
-      *[](fasttext::FastText& m, const std::string word) {
+      *[](FastText& m, const std::string word) {
         int dimension = m.getDimension();
         fasttext::Vector vec = fasttext::Vector(dimension);
         m.getWordVector(vec, word);
@@ -214,7 +239,7 @@ void Init_ext()
       })
     .define_method(
       "subwords",
-      *[](fasttext::FastText& m, const std::string word) {
+      *[](FastText& m, const std::string word) {
         std::vector<std::string> subwords;
         std::vector<int32_t> ngrams;
         std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
@@ -228,7 +253,7 @@ void Init_ext()
       })
     .define_method(
       "sentence_vector",
-      *[](fasttext::FastText& m, const std::string text) {
+      *[](FastText& m, const std::string text) {
         std::istringstream in(text);
         int dimension = m.getDimension();
         fasttext::Vector vec = fasttext::Vector(dimension);
@@ -242,22 +267,28 @@ void Init_ext()
       })
     .define_method(
       "train",
-      *[](fasttext::FastText& m, Hash h) {
-        m.train(buildArgs(h));
+      *[](FastText& m, Hash h) {
+        auto a = buildArgs(h);
+        if (a.hasAutotune()) {
+          fasttext::Autotune autotune(std::shared_ptr<fasttext::FastText>(&m, [](fasttext::FastText*) {}));
+          autotune.train(a);
+        } else {
+          m.train(a);
+        }
       })
     .define_method(
       "quantize",
-      *[](fasttext::FastText& m, Hash h) {
+      *[](FastText& m, Hash h) {
         m.quantize(buildArgs(h));
       })
     .define_method(
       "supervised?",
-      *[](fasttext::FastText& m) {
+      *[](FastText& m) {
         return m.getArgs().model == fasttext::model_name::sup;
       })
     .define_method(
       "label_prefix",
-      *[](fasttext::FastText& m) {
+      *[](FastText& m) {
         return m.getArgs().label;
       });
 }

data/ext/fasttext/extconf.rb CHANGED

@@ -1,9 +1,8 @@
 require "mkmf-rice"
-abort "Missing stdc++" unless have_library("stdc++")
 # TODO use -std=c++14 when available
-$CXXFLAGS << " -pthread -std=c++11 -funroll-loops -O3 -march=native"
+# -pthread and -O3 set by default
+$CXXFLAGS << " -std=c++11 -funroll-loops " << with_config("optflags", "-march=native")
 ext = File.expand_path(".", __dir__)
 fasttext = File.expand_path("../../vendor/fastText/src", __dir__)

data/lib/fasttext/classifier.rb CHANGED

@@ -21,13 +21,23 @@ module FastText
       verbose: 2,
       pretrained_vectors: "",
       save_output: false,
-      # seed: 0
+      seed: 0,
+      autotune_validation_file: "",
+      autotune_metric: "f1",
+      autotune_predictions: 1,
+      autotune_duration: 60 * 5,
+      autotune_model_size: ""
     }
-    def fit(x, y = nil)
+    def fit(x, y = nil, autotune_set: nil)
       input = input_path(x, y)
       @m ||= Ext::Model.new
-      m.train(DEFAULT_OPTIONS.merge(@options).merge(input: input, model: "supervised"))
+      opts = DEFAULT_OPTIONS.merge(@options).merge(input: input, model: "supervised")
+      if autotune_set
+        x, y = autotune_set
+        opts.merge!(autotune_validation_file: input_path(x, y))
+      end
+      m.train(opts)
     end
     def predict(text, k: 1, threshold: 0.0)

data/lib/fasttext/vectorizer.rb CHANGED

@@ -20,7 +20,12 @@ module FastText
       verbose: 2,
       pretrained_vectors: "",
       save_output: false,
-      # seed: 0
+      seed: 0,
+      autotune_validation_file: "",
+      autotune_metric: "f1",
+      autotune_predictions: 1,
+      autotune_duration: 60 * 5,
+      autotune_model_size: ""
     }
     def fit(x)

data/lib/fasttext/version.rb CHANGED

@@ -1,3 +1,3 @@
 module FastText
-  VERSION = "0.1.2"
+  VERSION = "0.1.3"
 end

data/vendor/fastText/README.md CHANGED

@@ -89,9 +89,9 @@ There is also the master branch that contains all of our most recent work, but c
 ### Building fastText using make (preferred)
 ```
-$ wget https://github.com/facebookresearch/fastText/archive/v0.9.1.zip
-$ unzip v0.9.1.zip
-$ cd fastText-0.9.1
+$ wget https://github.com/facebookresearch/fastText/archive/v0.9.2.zip
+$ unzip v0.9.2.zip
+$ cd fastText-0.9.2
 $ make
 ```

data/vendor/fastText/src/args.cc CHANGED

@@ -12,6 +12,8 @@
 #include <iostream>
 #include <stdexcept>
+#include <string>
+#include <unordered_map>
 namespace fasttext {
@@ -36,12 +38,19 @@ Args::Args() {
   verbose = 2;
   pretrainedVectors = "";
   saveOutput = false;
+  seed = 0;
   qout = false;
   retrain = false;
   qnorm = false;
   cutoff = 0;
   dsub = 2;
+  autotuneValidationFile = "";
+  autotuneMetric = "f1";
+  autotunePredictions = 1;
+  autotuneDuration = 60 * 5; // 5 minutes
+  autotuneModelSize = "";
 }
 std::string Args::lossToString(loss_name ln) const {
@@ -78,6 +87,24 @@ std::string Args::modelToString(model_name mn) const {
   return "Unknown model name!"; // should never happen
 }
+std::string Args::metricToString(metric_name mn) const {
+  switch (mn) {
+    case metric_name::f1score:
+      return "f1score";
+    case metric_name::f1scoreLabel:
+      return "f1scoreLabel";
+    case metric_name::precisionAtRecall:
+      return "precisionAtRecall";
+    case metric_name::precisionAtRecallLabel:
+      return "precisionAtRecallLabel";
+    case metric_name::recallAtPrecision:
+      return "recallAtPrecision";
+    case metric_name::recallAtPrecisionLabel:
+      return "recallAtPrecisionLabel";
+  }
+  return "Unknown metric name!"; // should never happen
+}
 void Args::parseArgs(const std::vector<std::string>& args) {
   std::string command(args[1]);
   if (command == "supervised") {
@@ -97,6 +124,8 @@ void Args::parseArgs(const std::vector<std::string>& args) {
       exit(EXIT_FAILURE);
     }
     try {
+      setManual(args[ai].substr(1));
       if (args[ai] == "-h") {
         std::cerr << "Here is the help! Usage:" << std::endl;
         printHelp();
@@ -157,6 +186,8 @@ void Args::parseArgs(const std::vector<std::string>& args) {
       } else if (args[ai] == "-saveOutput") {
         saveOutput = true;
         ai--;
+      } else if (args[ai] == "-seed") {
+        seed = std::stoi(args.at(ai + 1));
       } else if (args[ai] == "-qnorm") {
         qnorm = true;
         ai--;
@@ -170,6 +201,18 @@ void Args::parseArgs(const std::vector<std::string>& args) {
         cutoff = std::stoi(args.at(ai + 1));
       } else if (args[ai] == "-dsub") {
         dsub = std::stoi(args.at(ai + 1));
+      } else if (args[ai] == "-autotune-validation") {
+        autotuneValidationFile = std::string(args.at(ai + 1));
+      } else if (args[ai] == "-autotune-metric") {
+        autotuneMetric = std::string(args.at(ai + 1));
+        getAutotuneMetric(); // throws exception if not able to parse
+        getAutotuneMetricLabel(); // throws exception if not able to parse
+      } else if (args[ai] == "-autotune-predictions") {
+        autotunePredictions = std::stoi(args.at(ai + 1));
+      } else if (args[ai] == "-autotune-duration") {
+        autotuneDuration = std::stoi(args.at(ai + 1));
+      } else if (args[ai] == "-autotune-modelsize") {
+        autotuneModelSize = std::string(args.at(ai + 1));
       } else {
         std::cerr << "Unknown argument: " << args[ai] << std::endl;
         printHelp();
@@ -186,7 +229,7 @@ void Args::parseArgs(const std::vector<std::string>& args) {
     printHelp();
     exit(EXIT_FAILURE);
   }
-  if (wordNgrams <= 1 && maxn == 0) {
+  if (wordNgrams <= 1 && maxn == 0 && !hasAutotune()) {
     bucket = 0;
   }
 }
@@ -195,6 +238,7 @@ void Args::printHelp() {
   printBasicHelp();
   printDictionaryHelp();
   printTrainingHelp();
+  printAutotuneHelp();
   printQuantizationHelp();
 }
@@ -227,7 +271,8 @@ void Args::printTrainingHelp() {
   std::cerr
       << "\nThe following arguments for training are optional:\n"
       << "  -lr                 learning rate [" << lr << "]\n"
-      << "  -lrUpdateRate       change the rate of updates for the learning rate ["
+      << "  -lrUpdateRate       change the rate of updates for the learning "
+         "rate ["
       << lrUpdateRate << "]\n"
       << "  -dim                size of word vectors [" << dim << "]\n"
       << "  -ws                 size of the context window [" << ws << "]\n"
@@ -235,11 +280,31 @@ void Args::printTrainingHelp() {
       << "  -neg                number of negatives sampled [" << neg << "]\n"
       << "  -loss               loss function {ns, hs, softmax, one-vs-all} ["
       << lossToString(loss) << "]\n"
-      << "  -thread             number of threads [" << thread << "]\n"
-      << "  -pretrainedVectors  pretrained word vectors for supervised learning ["
+      << "  -thread             number of threads (set to 1 to ensure "
+         "reproducible results) ["
+      << thread << "]\n"
+      << "  -pretrainedVectors  pretrained word vectors for supervised "
+         "learning ["
       << pretrainedVectors << "]\n"
       << "  -saveOutput         whether output params should be saved ["
-      << boolToString(saveOutput) << "]\n";
+      << boolToString(saveOutput) << "]\n"
+      << "  -seed               random generator seed  [" << seed << "]\n";
+}
+void Args::printAutotuneHelp() {
+  std::cerr << "\nThe following arguments are for autotune:\n"
+            << "  -autotune-validation            validation file to be used "
+               "for evaluation\n"
+            << "  -autotune-metric                metric objective {f1, "
+               "f1:labelname} ["
+            << autotuneMetric << "]\n"
+            << "  -autotune-predictions           number of predictions used "
+               "for evaluation  ["
+            << autotunePredictions << "]\n"
+            << "  -autotune-duration              maximum duration in seconds ["
+            << autotuneDuration << "]\n"
+            << "  -autotune-modelsize             constraint model file size ["
+            << autotuneModelSize << "] (empty = do not quantize)\n";
 }
 void Args::printQuantizationHelp() {
@@ -247,7 +312,8 @@ void Args::printQuantizationHelp() {
       << "\nThe following arguments for quantization are optional:\n"
       << "  -cutoff             number of words and ngrams to retain ["
       << cutoff << "]\n"
-      << "  -retrain            whether embeddings are finetuned if a cutoff is applied ["
+      << "  -retrain            whether embeddings are finetuned if a cutoff "
+         "is applied ["
       << boolToString(retrain) << "]\n"
       << "  -qnorm              whether the norm is quantized separately ["
       << boolToString(qnorm) << "]\n"
@@ -317,4 +383,111 @@ void Args::dump(std::ostream& out) const {
       << " " << t << std::endl;
 }
+bool Args::hasAutotune() const {
+  return !autotuneValidationFile.empty();
+}
+bool Args::isManual(const std::string& argName) const {
+  return (manualArgs_.count(argName) != 0);
+}
+void Args::setManual(const std::string& argName) {
+  manualArgs_.emplace(argName);
+}
+metric_name Args::getAutotuneMetric() const {
+  if (autotuneMetric.substr(0, 3) == "f1:") {
+    return metric_name::f1scoreLabel;
+  } else if (autotuneMetric == "f1") {
+    return metric_name::f1score;
+  } else if (autotuneMetric.substr(0, 18) == "precisionAtRecall:") {
+    size_t semicolon = autotuneMetric.find(":", 18);
+    if (semicolon != std::string::npos) {
+      return metric_name::precisionAtRecallLabel;
+    }
+    return metric_name::precisionAtRecall;
+  } else if (autotuneMetric.substr(0, 18) == "recallAtPrecision:") {
+    size_t semicolon = autotuneMetric.find(":", 18);
+    if (semicolon != std::string::npos) {
+      return metric_name::recallAtPrecisionLabel;
+    }
+    return metric_name::recallAtPrecision;
+  }
+  throw std::runtime_error("Unknown metric : " + autotuneMetric);
+}
+std::string Args::getAutotuneMetricLabel() const {
+  metric_name metric = getAutotuneMetric();
+  std::string label;
+  if (metric == metric_name::f1scoreLabel) {
+    label = autotuneMetric.substr(3);
+  } else if (
+      metric == metric_name::precisionAtRecallLabel ||
+      metric == metric_name::recallAtPrecisionLabel) {
+    size_t semicolon = autotuneMetric.find(":", 18);
+    label = autotuneMetric.substr(semicolon + 1);
+  } else {
+    return label;
+  }
+  if (label.empty()) {
+    throw std::runtime_error("Empty metric label : " + autotuneMetric);
+  }
+  return label;
+}
+double Args::getAutotuneMetricValue() const {
+  metric_name metric = getAutotuneMetric();
+  double value = 0.0;
+  if (metric == metric_name::precisionAtRecallLabel ||
+      metric == metric_name::precisionAtRecall ||
+      metric == metric_name::recallAtPrecisionLabel ||
+      metric == metric_name::recallAtPrecision) {
+    size_t firstSemicolon = 18; // semicolon position in "precisionAtRecall:"
+    size_t secondSemicolon = autotuneMetric.find(":", firstSemicolon);
+    const std::string valueStr =
+        autotuneMetric.substr(firstSemicolon, secondSemicolon - firstSemicolon);
+    value = std::stof(valueStr) / 100.0;
+  }
+  return value;
+}
+int64_t Args::getAutotuneModelSize() const {
+  std::string modelSize = autotuneModelSize;
+  if (modelSize.empty()) {
+    return Args::kUnlimitedModelSize;
+  }
+  std::unordered_map<char, int> units = {
+      {'k', 1000},
+      {'K', 1000},
+      {'m', 1000000},
+      {'M', 1000000},
+      {'g', 1000000000},
+      {'G', 1000000000},
+  };
+  uint64_t multiplier = 1;
+  char lastCharacter = modelSize.back();
+  if (units.count(lastCharacter)) {
+    multiplier = units[lastCharacter];
+    modelSize = modelSize.substr(0, modelSize.size() - 1);
+  }
+  uint64_t size = 0;
+  size_t nonNumericCharacter = 0;
+  bool parseError = false;
+  try {
+    size = std::stol(modelSize, &nonNumericCharacter);
+  } catch (std::invalid_argument&) {
+    parseError = true;
+  }
+  if (!parseError && nonNumericCharacter != modelSize.size()) {
+    parseError = true;
+  }
+  if (parseError) {
+    throw std::invalid_argument(
+        "Unable to parse model size " + autotuneModelSize);
+  }
+  return size * multiplier;
+}
 } // namespace fasttext