RubyGems - tomoto - Versions diffs - 0.1.0 → 0.1.1 - Mend

tomoto 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a27c5c0ff4a71b0e0b084773adf7a2a0ede39152b210973787c12e98637cb7d3
-  data.tar.gz: c70cabfa9e8685e86edae56c4b50c52a8bb6baf4d3f7684c3e28d7345e460551
+  metadata.gz: 3b40c9adf2f0162eb6174b17395ea37b9294e14b22609e9f51951e9904125ff9
+  data.tar.gz: be3f68438f60a7e4fc11033921636f8d03bf411bd3d3eb6aa3b4fb448faac41a
 SHA512:
-  metadata.gz: d23a02abb149799facf1b557004ebc2d749131d37eb33e8f70e8aa109f117cc79a3db847326206fb08d5114b983959fa782082c9fe239e3573afe363d81f5066
-  data.tar.gz: 6ca2548b92c30adea217437dfad8a5e0ef802c2789a21cd5d40d88514607889b854afa6a2b7e1ef06f7ecabaa1226655c0dfc29aaa5439993861c2895542ae98
+  metadata.gz: a74747ae372d030c42562d4e2b99ab167ccc28533468ed08819f4bd34d42b340349870712c12e565388eb7833f993349432e77baee8618c34c265676ca181072
+  data.tar.gz: da9e833bb98726278108a68a7dd6bed0e54b3979c25d49d8db01aa613e6205a6a3512881688209baf2589c4dc5514ed2663fb251a5e273c42b678bb1daa06d74

data/CHANGELOG.md CHANGED

@@ -1,3 +1,7 @@
+## 0.1.1 (2020-10-10)
+- Added many more models
 ## 0.1.0 (2020-10-09)
 - First release

data/README.md CHANGED

@@ -1,6 +1,8 @@
-# Tomoto
+# tomoto
-[Tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
+:tomato: [tomoto](https://github.com/bab2min/tomotopy) - high performance topic modeling - for Ruby
+[![Build Status](https://travis-ci.org/ankane/tomoto.svg?branch=master)](https://travis-ci.org/ankane/tomoto)
 ## Installation
@@ -10,7 +12,7 @@ Add this line to your application’s Gemfile:
 gem 'tomoto'
 ```
-It can take around 10 minutes to compile the extension.
+It can take 10-20 minutes to compile the extension.
 ## Getting Started
@@ -65,23 +67,27 @@ model.ll_per_word
 Supports:
 - Latent Dirichlet Allocation (`LDA`)
+- Labeled LDA (`LLDA`)
+- Partially Labeled LDA (`PLDA`)
+- Supervised LDA (`SLDA`)
+- Dirichlet Multinomial Regression (`DMR`)
+- Generalized Dirichlet Multinomial Regression (`GDMR`)
 - Hierarchical Dirichlet Process (`HDP`)
+- Hierarchical LDA (`HLDA`)
+- Multi Grain LDA (`MGLDA`)
+- Pachinko Allocation (`PA`)
+- Hierarchical PA (`HPA`)
 - Correlated Topic Model (`CT`)
+- Dynamic Topic Model (`DT`)
-## Parameters
+## API
-```ruby
-Tomoto::LDA.new(
-  tw: :one, # or :idf, :pmi
-  min_cf: 0,
-  min_df: 0,
-  rm_top: 0,
-  k: 1,
-  alpha: 0.1,
-  eta: 0.01,
-  seed: nil
-)
-```
+This library follows the [tomotopy API](https://bab2min.github.io/tomotopy/v0.9.0/en/). There are a few changes to make it more Ruby-like:
+- The `get_` prefix has been removed from methods (`topic_words` instead of `get_topic_words`)
+- Methods that return booleans use `?` instead of `is_`  (`live_topic?` instead of `is_live_topic`)
+If a method or option you need isn’t supported, feel free to open an issue.
 ## Tokenization
@@ -93,7 +99,7 @@ model.add_doc(["tokens", "from", "document", "one"])
 ## Performance
-Tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check what it’s using with:
+tomoto uses AVX2, AVX, or SSE2 instructions to increase performance on machines that support it. Check what it’s using with:
 ```ruby
 Tomoto.isa

data/ext/tomoto/ext.cpp CHANGED

@@ -1,7 +1,21 @@
+// stdlib
+#include <fstream>
+#include <iostream>
 // tomoto
 #include <CT.h>
+#include <DMR.h>
+#include <DT.h>
+#include <GDMR.h>
 #include <HDP.h>
+#include <HLDA.h>
+#include <HPA.h>
 #include <LDA.h>
+#include <LLDA.h>
+#include <MGLDA.h>
+#include <PA.h>
+#include <PLDA.h>
+#include <SLDA.h>
 // rice
 #include <rice/Array.hpp>
@@ -26,6 +40,62 @@ Object to_ruby<std::vector<float>>(std::vector<float> const & x)
   return res;
 }
+template<>
+Object to_ruby<std::vector<uint32_t>>(std::vector<uint32_t> const & x)
+{
+  Array res;
+  for (auto const& v : x) {
+    res.push(v);
+  }
+  return res;
+}
+template<>
+Object to_ruby<std::vector<uint64_t>>(std::vector<uint64_t> const & x)
+{
+  Array res;
+  for (auto const& v : x) {
+    res.push(v);
+  }
+  return res;
+}
+template<>
+std::vector<std::string> from_ruby<std::vector<std::string>>(Object x)
+{
+  Array a = Array(x);
+  std::vector<std::string> res;
+  res.reserve(a.size());
+  for (auto const& v : a) {
+    res.push_back(from_ruby<std::string>(v));
+  }
+  return res;
+}
+template<>
+std::vector<float> from_ruby<std::vector<float>>(Object x)
+{
+  Array a = Array(x);
+  std::vector<float> res;
+  res.reserve(a.size());
+  for (auto const& v : a) {
+    res.push_back(from_ruby<float>(v));
+  }
+  return res;
+}
+template<>
+std::vector<uint64_t> from_ruby<std::vector<uint64_t>>(Object x)
+{
+  Array a = Array(x);
+  std::vector<uint64_t> res;
+  res.reserve(a.size());
+  for (auto const& v : a) {
+    res.push_back(from_ruby<uint64_t>(v));
+  }
+  return res;
+}
 extern "C"
 void Init_ext()
 {
@@ -55,12 +125,7 @@ void Init_ext()
       })
     .define_method(
       "_add_doc",
-      *[](tomoto::ILDAModel& self, Array rb_words) {
-        std::vector<std::string> words;
-        words.reserve(rb_words.size());
-        for (auto const& v : rb_words) {
-          words.push_back(from_ruby<std::string>(v));
-        }
+      *[](tomoto::ILDAModel& self, std::vector<std::string> words) {
         self.addDoc(words);
       })
     .define_method(
@@ -93,6 +158,11 @@ void Init_ext()
       *[](tomoto::ILDAModel& self) {
         return self.getEta();
       })
+    .define_method(
+      "global_step",
+      *[](tomoto::ILDAModel& self) {
+        return self.getGlobalStep();
+      })
     .define_method(
       "k",
       *[](tomoto::ILDAModel& self) {
@@ -112,15 +182,36 @@ void Init_ext()
         return self.getLLPerWord();
       })
     .define_method(
-      "num_words",
+      "num_docs",
       *[](tomoto::ILDAModel& self) {
-        return self.getN();
+        return self.getNumDocs();
       })
     .define_method(
       "num_vocabs",
       *[](tomoto::ILDAModel& self) {
         return self.getV();
       })
+    .define_method(
+      "num_words",
+      *[](tomoto::ILDAModel& self) {
+        return self.getN();
+      })
+    .define_method(
+      "optim_interval",
+      *[](tomoto::ILDAModel& self) {
+        return self.getOptimInterval();
+      })
+    .define_method(
+      "optim_interval=",
+      *[](tomoto::ILDAModel& self, size_t value) {
+        self.setOptimInterval(value);
+        return value;
+      })
+    .define_method(
+      "perplexity",
+      *[](tomoto::ILDAModel& self) {
+        return self.getPerplexity();
+      })
     .define_method(
       "_prepare",
       *[](tomoto::ILDAModel& self, size_t minCnt, size_t minDf, size_t rmTop) {
@@ -159,6 +250,62 @@ void Init_ext()
         size_t ps = 0;
         self.train(iteration, workers, (tomoto::ParallelScheme)ps);
       })
+    .define_method(
+      "_tw",
+      *[](tomoto::ILDAModel& self) {
+        return (int)self.getTermWeight();
+      })
+    .define_method(
+      "used_vocab_df",
+      *[](tomoto::ILDAModel& self) {
+        auto vocab = self.getVocabDf();
+        Array res;
+        for (size_t i = 0; i < self.getV(); i++) {
+          res.push(vocab[i]);
+        }
+        return res;
+      })
+    .define_method(
+      "used_vocab_freq",
+      *[](tomoto::ILDAModel& self) {
+        auto vocab = self.getVocabCf();
+        Array res;
+        for (size_t i = 0; i < self.getV(); i++) {
+          res.push(vocab[i]);
+        }
+        return res;
+      })
+    .define_method(
+      "used_vocabs",
+      *[](tomoto::ILDAModel& self) {
+        auto dict = self.getVocabDict();
+        Array res;
+        auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
+        for (size_t i = 0; i < self.getV(); i++) {
+          res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
+        }
+        return res;
+      })
+    .define_method(
+      "vocab_df",
+      *[](tomoto::ILDAModel& self) {
+        auto vocab = self.getVocabDf();
+        Array res;
+        for (size_t i = 0; i < vocab.size(); i++) {
+          res.push(vocab[i]);
+        }
+        return res;
+      })
+    .define_method(
+      "vocab_freq",
+      *[](tomoto::ILDAModel& self) {
+        auto vocab = self.getVocabCf();
+        Array res;
+        for (size_t i = 0; i < vocab.size(); i++) {
+          res.push(vocab[i]);
+        }
+        return res;
+      })
     .define_method(
       "vocabs",
       *[](tomoto::ILDAModel& self) {
@@ -180,6 +327,11 @@ void Init_ext()
         }
         return tomoto::ICTModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
       })
+    .define_method(
+      "_correlations",
+      *[](tomoto::ICTModel& self, tomoto::Tid topic_id) {
+        return self.getCorrelationTopic(topic_id);
+      })
     .define_method(
       "num_beta_sample",
       *[](tomoto::ICTModel& self) {
@@ -187,9 +339,9 @@ void Init_ext()
       })
     .define_method(
       "num_beta_sample=",
-      *[](tomoto::ICTModel& self, size_t numSample) {
-        self.setNumBetaSample(numSample);
-        return numSample;
+      *[](tomoto::ICTModel& self, size_t value) {
+        self.setNumBetaSample(value);
+        return value;
       })
     .define_method(
       "num_tmn_sample",
@@ -198,12 +350,12 @@ void Init_ext()
       })
     .define_method(
       "num_tmn_sample=",
-      *[](tomoto::ICTModel& self, size_t numSample) {
-        self.setNumTMNSample(numSample);
-        return numSample;
+      *[](tomoto::ICTModel& self, size_t value) {
+        self.setNumTMNSample(value);
+        return value;
       })
     .define_method(
-      "prior_cov",
+      "_prior_cov",
       *[](tomoto::ICTModel& self) {
         return self.getPriorCov();
       })
@@ -213,6 +365,138 @@ void Init_ext()
         return self.getPriorMean();
       });
+  Class rb_cDMR = define_class_under<tomoto::IDMRModel, tomoto::ILDAModel>(rb_mTomoto, "DMR")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k, float alpha, float sigma, float eta, float alpha_epsilon, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IDMRModel::create((tomoto::TermWeight)tw, k, alpha, sigma, eta, alpha_epsilon, seed);
+      })
+    .define_method(
+      "_add_doc",
+      *[](tomoto::IDMRModel& self, std::vector<std::string> words, std::vector<std::string> metadata) {
+        self.addDoc(words, metadata);
+      })
+    .define_method(
+      "alpha_epsilon",
+      *[](tomoto::IDMRModel& self) {
+        return self.getAlphaEps();
+      })
+    .define_method(
+      "alpha_epsilon=",
+      *[](tomoto::IDMRModel& self, float value) {
+        self.setAlphaEps(value);
+        return value;
+      })
+    .define_method(
+      "f",
+      *[](tomoto::IDMRModel& self) {
+        return self.getF();
+      })
+    .define_method(
+      "_lambdas",
+      *[](tomoto::IDMRModel& self, tomoto::Tid topic_id) {
+        return self.getLambdaByTopic(topic_id);
+      })
+    .define_method(
+      "metadata_dict",
+      *[](tomoto::IDMRModel& self) {
+        auto dict = self.getMetadataDict();
+        Array res;
+        auto utf8 = Class(rb_cEncoding).call("const_get", "UTF_8");
+        for (size_t i = 0; i < dict.size(); i++) {
+          res.push(to_ruby<std::string>(dict.toWord(i)).call("force_encoding", utf8));
+        }
+        return res;
+      })
+    .define_method(
+      "sigma",
+      *[](tomoto::IDMRModel& self) {
+        return self.getSigma();
+      });
+  Class rb_cDT = define_class_under<tomoto::IDTModel, tomoto::ILDAModel>(rb_mTomoto, "DT")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k, size_t t, float alphaVar, float etaVar, float phiVar, float shapeA, float shapeB, float shapeC) {
+        // Rice only supports 10 arguments
+        int seed = -1;
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IDTModel::create((tomoto::TermWeight)tw, k, t, alphaVar, etaVar, phiVar, shapeA, shapeB, shapeC, 0, seed);
+      })
+    .define_method(
+      "_add_doc",
+      *[](tomoto::IDTModel& self, std::vector<std::string> words, size_t timepoint) {
+        self.addDoc(words, timepoint);
+      })
+    .define_method(
+      "lr_a",
+      *[](tomoto::IDTModel& self) {
+        return self.getShapeA();
+      })
+    .define_method(
+      "lr_a=",
+      *[](tomoto::IDTModel& self, float value) {
+        self.setShapeA(value);
+        return value;
+      })
+    .define_method(
+      "lr_b",
+      *[](tomoto::IDTModel& self) {
+        return self.getShapeB();
+      })
+    .define_method(
+      "lr_b=",
+      *[](tomoto::IDTModel& self, float value) {
+        self.setShapeB(value);
+        return value;
+      })
+    .define_method(
+      "lr_c",
+      *[](tomoto::IDTModel& self) {
+        return self.getShapeC();
+      })
+    .define_method(
+      "lr_c=",
+      *[](tomoto::IDTModel& self, float value) {
+        self.setShapeC(value);
+        return value;
+      })
+    .define_method(
+      "num_docs_by_timepoint",
+      *[](tomoto::IDTModel& self) {
+        return self.getNumDocsByT();
+      })
+    .define_method(
+      "num_timepoints",
+      *[](tomoto::IDTModel& self) {
+        return self.getT();
+      });
+  Class rb_cGDMR = define_class_under<tomoto::IGDMRModel, tomoto::IDMRModel>(rb_mTomoto, "GDMR")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k, std::vector<uint64_t> degrees, float alpha, float sigma, float sigma0, float eta, float alpha_epsilon, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IGDMRModel::create((tomoto::TermWeight)tw, k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed);
+      })
+    .define_method(
+      "degrees",
+      *[](tomoto::IGDMRModel& self) {
+        return self.getFs();
+      })
+    .define_method(
+      "sigma0",
+      *[](tomoto::IGDMRModel& self) {
+        return self.getSigma0();
+      });
   Class rb_cHDP = define_class_under<tomoto::IHDPModel, tomoto::ILDAModel>(rb_mTomoto, "HDP")
     .define_singleton_method(
       "_new",
@@ -242,4 +526,217 @@ void Init_ext()
       *[](tomoto::IHDPModel& self) {
         return self.getTotalTables();
       });
+  Class rb_cHLDA = define_class_under<tomoto::IHLDAModel, tomoto::ILDAModel>(rb_mTomoto, "HLDA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t levelDepth, float alpha, float eta, float gamma, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IHLDAModel::create((tomoto::TermWeight)tw, levelDepth, alpha, eta, gamma, seed);
+      })
+    .define_method(
+      "_children_topics",
+      *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
+        return self.getChildTopicId(topic_id);
+      })
+    .define_method(
+      "depth",
+      *[](tomoto::IHLDAModel& self) {
+        return self.getLevelDepth();
+      })
+    .define_method(
+      "gamma",
+      *[](tomoto::IHLDAModel& self) {
+        return self.getGamma();
+      })
+    .define_method(
+      "_level",
+      *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
+        return self.getLevelOfTopic(topic_id);
+      })
+    .define_method(
+      "live_k",
+      *[](tomoto::IHLDAModel& self) {
+        return self.getLiveK();
+      })
+    .define_method(
+      "_live_topic?",
+      *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
+        return self.isLiveTopic(topic_id);
+      })
+    .define_method(
+      "_num_docs_of_topic",
+      *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
+        return self.getNumDocsOfTopic(topic_id);
+      })
+    .define_method(
+      "_parent_topic",
+      *[](tomoto::IHLDAModel& self, tomoto::Tid topic_id) {
+        return self.getParentTopicId(topic_id);
+      });
+  Class rb_cPA = define_class_under<tomoto::IPAModel, tomoto::ILDAModel>(rb_mTomoto, "PA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IPAModel::create((tomoto::TermWeight)tw, k1, k2, alpha, eta, seed);
+      })
+    .define_method(
+      "k1",
+      *[](tomoto::IPAModel& self) {
+        return self.getK();
+      })
+    .define_method(
+      "k2",
+      *[](tomoto::IPAModel& self) {
+        return self.getK2();
+      });
+  Class rb_cHPA = define_class_under<tomoto::IHPAModel, tomoto::IPAModel>(rb_mTomoto, "HPA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k1, size_t k2, float alpha, float eta, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IHPAModel::create((tomoto::TermWeight)tw, false, k1, k2, alpha, eta, seed);
+      });
+  Class rb_cMGLDA = define_class_under<tomoto::IMGLDAModel, tomoto::ILDAModel>(rb_mTomoto, "MGLDA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k_g, size_t k_l, size_t t, float alpha_g, float alpha_l, float alpha_mg, float alpha_ml, float eta_g) {
+        return tomoto::IMGLDAModel::create((tomoto::TermWeight)tw, k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g);
+      })
+    .define_method(
+      "_add_doc",
+      *[](tomoto::IMGLDAModel& self, std::vector<std::string> words, std::string delimiter) {
+        self.addDoc(words, delimiter);
+      })
+    .define_method(
+      "alpha_g",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getAlpha();
+      })
+    .define_method(
+      "alpha_l",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getAlphaL();
+      })
+    .define_method(
+      "alpha_mg",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getAlphaM();
+      })
+    .define_method(
+      "alpha_ml",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getAlphaML();
+      })
+    .define_method(
+      "eta_g",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getEta();
+      })
+    .define_method(
+      "eta_l",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getEtaL();
+      })
+    .define_method(
+      "gamma",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getGamma();
+      })
+    .define_method(
+      "k_g",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getK();
+      })
+    .define_method(
+      "k_l",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getKL();
+      })
+    .define_method(
+      "t",
+      *[](tomoto::IMGLDAModel& self) {
+        return self.getT();
+      });
+  Class rb_cLLDA = define_class_under<tomoto::ILLDAModel, tomoto::ILDAModel>(rb_mTomoto, "LLDA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k, float alpha, float eta, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::ILLDAModel::create((tomoto::TermWeight)tw, k, alpha, eta, seed);
+      })
+    .define_method(
+      "_add_doc",
+      *[](tomoto::ILLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
+        self.addDoc(words, labels);
+      })
+    .define_method(
+      "topics_per_label",
+      *[](tomoto::ILLDAModel& self) {
+        return self.getNumTopicsPerLabel();
+      });
+  Class rb_cPLDA = define_class_under<tomoto::IPLDAModel, tomoto::ILLDAModel>(rb_mTomoto, "PLDA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t latent_topics, float alpha, float eta, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        return tomoto::IPLDAModel::create((tomoto::TermWeight)tw, latent_topics, 1, alpha, eta, seed);
+      })
+    .define_method(
+      "_add_doc",
+      *[](tomoto::IPLDAModel& self, std::vector<std::string> words, std::vector<std::string> labels) {
+        self.addDoc(words, labels);
+      })
+    .define_method(
+      "latent_topics",
+      *[](tomoto::IPLDAModel& self) {
+        return self.getNumLatentTopics();
+      });
+  Class rb_cSLDA = define_class_under<tomoto::ISLDAModel, tomoto::ILDAModel>(rb_mTomoto, "SLDA")
+    .define_singleton_method(
+      "_new",
+      *[](size_t tw, size_t k, Array rb_vars, float alpha, float eta, std::vector<float> mu, std::vector<float> nu_sq, std::vector<float> glm_param, int seed) {
+        if (seed < 0) {
+          seed = std::random_device{}();
+        }
+        std::vector<tomoto::ISLDAModel::GLM> vars;
+        vars.reserve(rb_vars.size());
+        for (auto const& v : rb_vars) {
+          vars.push_back((tomoto::ISLDAModel::GLM) from_ruby<int>(v));
+        }
+        return tomoto::ISLDAModel::create((tomoto::TermWeight)tw, k, vars, alpha, eta, mu, nu_sq, glm_param, seed);
+      })
+    .define_method(
+      "_add_doc",
+      *[](tomoto::ISLDAModel& self, std::vector<std::string> words, std::vector<float> y) {
+        self.addDoc(words, y);
+      })
+    .define_method(
+      "f",
+      *[](tomoto::ISLDAModel& self) {
+        return self.getF();
+      })
+    .define_method(
+      "_var_type",
+      *[](tomoto::ISLDAModel& self, size_t var_id) {
+        if (var_id >= self.getF()) throw std::runtime_error{ "'var_id' must be < 'f'" };
+        return self.getTypeOfVar(var_id) == tomoto::ISLDAModel::GLM::linear ? "l" : "b";
+      });
 }

data/ext/tomoto/extconf.rb CHANGED

@@ -11,6 +11,9 @@ apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
 if apple_clang
   # silence rice warnings
   $CXXFLAGS += " -Wno-deprecated-declarations"
+else
+  # silence eigen warnings
+  $CXXFLAGS += " -Wno-ignored-attributes -Wno-deprecated-copy"
 end
 # silence tomoto warnings

data/lib/tomoto.rb CHANGED

@@ -3,8 +3,18 @@ require "tomoto/ext"
 # modules
 require "tomoto/ct"
+require "tomoto/dmr"
+require "tomoto/dt"
+require "tomoto/gdmr"
 require "tomoto/hdp"
+require "tomoto/hlda"
+require "tomoto/hpa"
 require "tomoto/lda"
+require "tomoto/llda"
+require "tomoto/mglda"
+require "tomoto/pa"
+require "tomoto/plda"
+require "tomoto/slda"
 require "tomoto/version"
 module Tomoto

data/lib/tomoto/ct.rb CHANGED

@@ -7,5 +7,18 @@ module Tomoto
       model.instance_variable_set(:@rm_top, rm_top)
       model
     end
+    def correlations(topic_id = nil)
+      prepare
+      if topic_id
+        _correlations(topic_id)
+      else
+        k.times.map { |i| _correlations(i) }
+      end
+    end
+    def prior_cov
+      _prior_cov.each_slice(k).to_a
+    end
   end
 end

data/lib/tomoto/dmr.rb ADDED

@@ -0,0 +1,23 @@
+module Tomoto
+  class DMR
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, sigma: 1.0, alpha_epsilon: 1e-10, seed: nil)
+      model = _new(to_tw(tw), k, alpha, sigma, eta, alpha_epsilon, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, metadata: "")
+      _add_doc(prepare_doc(doc), [metadata])
+    end
+    def lambdas
+      if f == 0
+        []
+      else
+        k.times.map { |i| _lambdas(i) }
+      end
+    end
+  end
+end

data/lib/tomoto/dt.rb ADDED

@@ -0,0 +1,15 @@
+module Tomoto
+  class DT
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, t: 1, alpha_var: 0.1, eta_var: 0.1, phi_var: 0.1, lr_a: 0.01, lr_b: 0.1, lr_c: 0.55) #, seed: nil)
+      model = _new(to_tw(tw), k, t, alpha_var, eta_var, phi_var, lr_a, lr_b, lr_c)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, timepoint: 0)
+      _add_doc(prepare_doc(doc), timepoint)
+    end
+  end
+end

data/lib/tomoto/gdmr.rb ADDED

@@ -0,0 +1,15 @@
+module Tomoto
+  class GDMR
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, degrees: [], alpha: 0.1, eta: 0.01, sigma: 1.0, sigma0: 3.0, alpha_epsilon: 1e-10, seed: nil)
+      model = _new(to_tw(tw), k, degrees, alpha, sigma, sigma0, eta, alpha_epsilon, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, metadata: [])
+      _add_doc(prepare_doc(doc), metadata.map(&:to_s))
+    end
+  end
+end

data/lib/tomoto/hlda.rb ADDED

@@ -0,0 +1,43 @@
+module Tomoto
+  class HLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, depth: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
+      model = _new(to_tw(tw), depth, alpha, eta, gamma, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def children_topics(topic_id)
+      check_topic(topic_id)
+      _children_topics(topic_id)
+    end
+    def level(topic_id)
+      check_topic(topic_id)
+      _live_topic?(topic_id) ? _level(topic_id) : -1
+    end
+    def live_topic?(topic_id)
+      check_topic(topic_id)
+      _live_topic?(topic_id)
+    end
+    def num_docs_of_topic(topic_id)
+      check_topic(topic_id)
+      _num_docs_of_topic(topic_id)
+    end
+    def parent_topic(topic_id)
+      check_topic(topic_id)
+      _live_topic?(topic_id) ? _parent_topic(topic_id) : -1
+    end
+    private
+    def check_topic(topic_id)
+      raise "topic_id must be < K" if topic_id >= k
+      raise "train() should be called first" unless @prepared
+    end
+  end
+end

data/lib/tomoto/hpa.rb ADDED

@@ -0,0 +1,11 @@
+module Tomoto
+  class HPA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+  end
+end

data/lib/tomoto/lda.rb CHANGED

@@ -15,9 +15,7 @@ module Tomoto
     end
     def add_doc(doc)
-      raise "cannot add_doc() after train()" if defined?(@prepared)
-      doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
-      _add_doc(doc)
+      _add_doc(prepare_doc(doc))
     end
     def count_by_topics
@@ -47,6 +45,10 @@ module Tomoto
       _train(iterations, workers)
     end
+    def tw
+      TERM_WEIGHT[_tw]
+    end
     private
     def prepare
@@ -56,6 +58,12 @@ module Tomoto
       end
     end
+    def prepare_doc(doc)
+      raise "cannot add_doc() after train()" if defined?(@prepared)
+      doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
+      doc
+    end
     class << self
       private

data/lib/tomoto/llda.rb ADDED

@@ -0,0 +1,15 @@
+module Tomoto
+  class LLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, labels: [])
+      _add_doc(prepare_doc(doc), labels)
+    end
+  end
+end

data/lib/tomoto/mglda.rb ADDED

@@ -0,0 +1,15 @@
+module Tomoto
+  class MGLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k_g: 1, k_l: 1, t: 3, alpha_g: 0.1, alpha_l: 0.1, alpha_mg: 0.1, alpha_ml: 0.1, eta_g: 0.01) #, eta_l: 0.01, gamma: 0.1, seed: nil)
+      model = _new(to_tw(tw), k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, delimiter: ".")
+      _add_doc(prepare_doc(doc), delimiter)
+    end
+  end
+end

data/lib/tomoto/pa.rb ADDED

@@ -0,0 +1,11 @@
+module Tomoto
+  class PA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+  end
+end

data/lib/tomoto/plda.rb ADDED

@@ -0,0 +1,15 @@
+module Tomoto
+  class PLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, latent_topics: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), latent_topics, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, labels: [])
+      _add_doc(prepare_doc(doc), labels)
+    end
+  end
+end

data/lib/tomoto/slda.rb ADDED

@@ -0,0 +1,37 @@
+module Tomoto
+  class SLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, vars: "", alpha: 0.1, eta: 0.01, mu: [], nu_sq: [], glm_param: [], seed: nil)
+      model = _new(to_tw(tw), k, vars.split("").map { |v| to_glm(v) }, alpha, eta, mu, nu_sq, glm_param, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      model
+    end
+    def add_doc(doc, y: [])
+      _add_doc(prepare_doc(doc), y)
+    end
+    def var_type(var_id)
+      raise "train() should be called first" unless @prepared
+      _var_type(var_id)
+    end
+    private
+    class << self
+      private
+      def to_glm(v)
+        case v
+        when "l"
+          0
+        when "b"
+          1
+        else
+          raise "Invalid var: #{v}"
+        end
+      end
+    end
+  end
+end

data/lib/tomoto/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Tomoto
-  VERSION = "0.1.0"
+  VERSION = "0.1.1"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: tomoto
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-10-09 00:00:00.000000000 Z
+date: 2020-10-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice
@@ -94,8 +94,18 @@ files:
 - ext/tomoto/extconf.rb
 - lib/tomoto.rb
 - lib/tomoto/ct.rb
+- lib/tomoto/dmr.rb
+- lib/tomoto/dt.rb
+- lib/tomoto/gdmr.rb
 - lib/tomoto/hdp.rb
+- lib/tomoto/hlda.rb
+- lib/tomoto/hpa.rb
 - lib/tomoto/lda.rb
+- lib/tomoto/llda.rb
+- lib/tomoto/mglda.rb
+- lib/tomoto/pa.rb
+- lib/tomoto/plda.rb
+- lib/tomoto/slda.rb
 - lib/tomoto/version.rb
 - vendor/EigenRand/EigenRand/Core.h
 - vendor/EigenRand/EigenRand/Dists/Basic.h