RubyGems - tomoto - Versions diffs - 0.3.0-x86_64-linux - Mend

tomoto 0.3.0-x86_64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +45 -0
data/LICENSE.txt +22 -0
data/README.md +162 -0
data/ext/tomoto/ct.cpp +58 -0
data/ext/tomoto/dmr.cpp +69 -0
data/ext/tomoto/dt.cpp +91 -0
data/ext/tomoto/extconf.rb +34 -0
data/ext/tomoto/gdmr.cpp +42 -0
data/ext/tomoto/hdp.cpp +47 -0
data/ext/tomoto/hlda.cpp +71 -0
data/ext/tomoto/hpa.cpp +32 -0
data/ext/tomoto/lda.cpp +281 -0
data/ext/tomoto/llda.cpp +33 -0
data/ext/tomoto/mglda.cpp +81 -0
data/ext/tomoto/pa.cpp +32 -0
data/ext/tomoto/plda.cpp +33 -0
data/ext/tomoto/slda.cpp +48 -0
data/ext/tomoto/tomoto.cpp +48 -0
data/ext/tomoto/utils.h +30 -0
data/lib/tomoto/2.7/tomoto.so +0 -0
data/lib/tomoto/3.0/tomoto.so +0 -0
data/lib/tomoto/3.1/tomoto.so +0 -0
data/lib/tomoto/ct.rb +24 -0
data/lib/tomoto/dmr.rb +27 -0
data/lib/tomoto/dt.rb +15 -0
data/lib/tomoto/gdmr.rb +15 -0
data/lib/tomoto/hdp.rb +11 -0
data/lib/tomoto/hlda.rb +56 -0
data/lib/tomoto/hpa.rb +11 -0
data/lib/tomoto/lda.rb +181 -0
data/lib/tomoto/llda.rb +15 -0
data/lib/tomoto/mglda.rb +15 -0
data/lib/tomoto/pa.rb +11 -0
data/lib/tomoto/plda.rb +15 -0
data/lib/tomoto/slda.rb +37 -0
data/lib/tomoto/version.rb +3 -0
data/lib/tomoto.rb +27 -0
data/vendor/EigenRand/EigenRand/EigenRand +24 -0
data/vendor/EigenRand/LICENSE +21 -0
data/vendor/EigenRand/README.md +426 -0
data/vendor/eigen/COPYING.APACHE +203 -0
data/vendor/eigen/COPYING.BSD +26 -0
data/vendor/eigen/COPYING.GPL +674 -0
data/vendor/eigen/COPYING.LGPL +502 -0
data/vendor/eigen/COPYING.MINPACK +51 -0
data/vendor/eigen/COPYING.MPL2 +373 -0
data/vendor/eigen/COPYING.README +18 -0
data/vendor/eigen/Eigen/Cholesky +45 -0
data/vendor/eigen/Eigen/CholmodSupport +48 -0
data/vendor/eigen/Eigen/Core +384 -0
data/vendor/eigen/Eigen/Dense +7 -0
data/vendor/eigen/Eigen/Eigen +2 -0
data/vendor/eigen/Eigen/Eigenvalues +60 -0
data/vendor/eigen/Eigen/Geometry +59 -0
data/vendor/eigen/Eigen/Householder +29 -0
data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
data/vendor/eigen/Eigen/Jacobi +32 -0
data/vendor/eigen/Eigen/KLUSupport +41 -0
data/vendor/eigen/Eigen/LU +47 -0
data/vendor/eigen/Eigen/MetisSupport +35 -0
data/vendor/eigen/Eigen/OrderingMethods +70 -0
data/vendor/eigen/Eigen/PaStiXSupport +49 -0
data/vendor/eigen/Eigen/PardisoSupport +35 -0
data/vendor/eigen/Eigen/QR +50 -0
data/vendor/eigen/Eigen/QtAlignedMalloc +39 -0
data/vendor/eigen/Eigen/SPQRSupport +34 -0
data/vendor/eigen/Eigen/SVD +50 -0
data/vendor/eigen/Eigen/Sparse +34 -0
data/vendor/eigen/Eigen/SparseCholesky +37 -0
data/vendor/eigen/Eigen/SparseCore +69 -0
data/vendor/eigen/Eigen/SparseLU +50 -0
data/vendor/eigen/Eigen/SparseQR +36 -0
data/vendor/eigen/Eigen/StdDeque +27 -0
data/vendor/eigen/Eigen/StdList +26 -0
data/vendor/eigen/Eigen/StdVector +27 -0
data/vendor/eigen/Eigen/SuperLUSupport +64 -0
data/vendor/eigen/Eigen/UmfPackSupport +40 -0
data/vendor/eigen/README.md +5 -0
data/vendor/eigen/bench/README.txt +55 -0
data/vendor/eigen/bench/btl/COPYING +340 -0
data/vendor/eigen/bench/btl/README +154 -0
data/vendor/eigen/bench/tensors/README +20 -0
data/vendor/eigen/blas/README.txt +6 -0
data/vendor/eigen/ci/README.md +56 -0
data/vendor/eigen/demos/mandelbrot/README +10 -0
data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
data/vendor/eigen/demos/opengl/README +13 -0
data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1815 -0
data/vendor/eigen/unsupported/README.txt +50 -0
data/vendor/tomotopy/LICENSE +21 -0
data/vendor/tomotopy/README.kr.rst +512 -0
data/vendor/tomotopy/README.rst +516 -0
data/vendor/variant/LICENSE +25 -0
data/vendor/variant/LICENSE_1_0.txt +23 -0
data/vendor/variant/README.md +102 -0
metadata +140 -0

data/lib/tomoto/hlda.rb ADDED Viewed

@@ -0,0 +1,56 @@
+module Tomoto
+  class HLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, depth: 2, alpha: 0.1, eta: 0.01, gamma: 0.1, seed: nil)
+      model = _new(to_tw(tw), depth, alpha, eta, gamma, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+    def children_topics(topic_id)
+      check_topic(topic_id)
+      _children_topics(topic_id)
+    end
+    def level(topic_id)
+      check_topic(topic_id)
+      _live_topic?(topic_id) ? _level(topic_id) : -1
+    end
+    def live_topic?(topic_id)
+      check_topic(topic_id)
+      _live_topic?(topic_id)
+    end
+    def num_docs_of_topic(topic_id)
+      check_topic(topic_id)
+      _num_docs_of_topic(topic_id)
+    end
+    def parent_topic(topic_id)
+      check_topic(topic_id)
+      _live_topic?(topic_id) ? _parent_topic(topic_id) : -1
+    end
+    private
+    def check_topic(topic_id)
+      raise "topic_id must be < K" if topic_id >= k
+      raise "train() should be called first" unless @prepared
+    end
+    def topics_info(summary, topic_word_top_n:)
+      counts = count_by_topics
+      nested_info = lambda do |k = 0, level = 0|
+        words = topic_words(k, top_n: topic_word_top_n).keys.join(" ")
+        summary << "| #{"  " * level}##{k} (#{counts[k]}) : #{words}"
+        children_topics(k).sort.each do |c|
+          nested_info.call(c, level + 1)
+        end
+      end
+      nested_info.call
+    end
+  end
+end

data/lib/tomoto/hpa.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module Tomoto
+  class HPA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+  end
+end

data/lib/tomoto/lda.rb ADDED Viewed

@@ -0,0 +1,181 @@
+module Tomoto
+  class LDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+    def self.load(filename)
+      model = new
+      model._load(filename)
+      model
+    end
+    def add_doc(doc)
+      _add_doc(prepare_doc(doc))
+    end
+    def make_doc(doc)
+      _make_doc(tokenize_doc(doc))
+    end
+    # TODO support multiple docs
+    def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
+      raise "cannot infer with untrained model" unless defined?(@prepared)
+      _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
+    end
+    def count_by_topics
+      prepare
+      _count_by_topics
+    end
+    def removed_top_words
+      prepare
+      _removed_top_words(@rm_top)
+    end
+    def save(filename, full: true)
+      _save(filename, full)
+    end
+    # returns string instead of printing
+    def summary(initial_hp: true, params: true, topic_word_top_n: 5)
+      summary = []
+      summary << "<Basic Info>"
+      basic_info(summary)
+      summary << "|"
+      summary << "<Training Info>"
+      training_info(summary)
+      summary << "|"
+      if initial_hp
+        summary << "<Initial Parameters>"
+        initial_params_info(summary)
+        summary << "|"
+      end
+      if params
+        summary << "<Parameters>"
+        params_info(summary)
+        summary << "|"
+      end
+      if topic_word_top_n > 0
+        summary << "<Topics>"
+        topics_info(summary, topic_word_top_n: topic_word_top_n)
+        summary << "|"
+      end
+      # skip ending |
+      summary.pop
+      summary.join("\n")
+    end
+    def topic_words(topic_id = nil, top_n: 10)
+      if topic_id
+        _topic_words(topic_id, top_n)
+      else
+        k.times.map { |i| _topic_words(i, top_n) }
+      end
+    end
+    def train(iterations = 10, workers: 0, parallel: :default)
+      prepare
+      _train(iterations, workers, to_ps(parallel))
+    end
+    def tw
+      TERM_WEIGHT[_tw]
+    end
+    private
+    def prepare
+      unless defined?(@prepared)
+        _prepare(@min_cf, @min_df, @rm_top)
+        @prepared = true
+      end
+    end
+    def prepare_doc(doc)
+      raise "cannot add_doc() after train()" if defined?(@prepared)
+      tokenize_doc(doc)
+    end
+    def tokenize_doc(doc)
+      doc = doc.split(/[[:space:]]+/) unless doc.is_a?(Array)
+      doc
+    end
+    def basic_info(summary)
+      sum = used_vocab_freq.sum.to_f
+      mapped = used_vocab_freq.map { |v| v / sum }
+      entropy = mapped.map { |v| v * Math.log(v) }.sum
+      summary << "| #{self.class.name.sub("Tomoto::", "")} (current version: #{VERSION})"
+      summary << "| #{num_docs} docs, #{num_words} words"
+      summary << "| Total Vocabs: #{vocabs.size}, Used Vocabs: #{used_vocabs.size}"
+      summary << "| Entropy of words: %.5f" % entropy
+      summary << "| Removed Vocabs: #{removed_top_words.any? ? removed_top_words.join(" ") : "<NA>"}"
+    end
+    def training_info(summary)
+      summary << "| Iterations: #{global_step}, Burn-in steps: #{burn_in}"
+      summary << "| Optimization Interval: #{optim_interval}"
+      summary << "| Log-likelihood per word: %.5f" % ll_per_word
+    end
+    def initial_params_info(summary)
+      if defined?(@init_params)
+        @init_params.each do |k, v|
+          summary << "| #{k}: #{v}"
+        end
+      else
+        summary << "| Not Available"
+      end
+    end
+    def params_info(summary)
+      summary << "| alpha (Dirichlet prior on the per-document topic distributions)"
+      summary << "|  #{alpha}"
+      summary << "| eta (Dirichlet prior on the per-topic word distribution)"
+      summary << "|  %.5f" % eta
+    end
+    def topics_info(summary, topic_word_top_n:)
+      counts = count_by_topics
+      topic_words(top_n: topic_word_top_n).each_with_index do |words, i|
+        summary << "| ##{i} (#{counts[i]}) : #{words.keys.join(" ")}"
+      end
+    end
+    def to_ps(ps)
+      PARALLEL_SCHEME.index(ps) || (raise ArgumentError, "Invalid parallel scheme: #{ps}")
+    end
+    class << self
+      private
+      def to_tw(tw)
+        TERM_WEIGHT.index(tw) || (raise ArgumentError, "Invalid tw: #{tw}")
+      end
+      def init_params(model, binding)
+        init_params = {}
+        method(:new).parameters.each do |v|
+          next if v[0] != :key
+          init_params[v[1]] = binding.local_variable_get(v[1]).inspect
+        end
+        model.instance_variable_set(:@init_params, init_params)
+        model
+      end
+    end
+  end
+end

data/lib/tomoto/llda.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Tomoto
+  class LLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+    def add_doc(doc, labels: [])
+      _add_doc(prepare_doc(doc), labels)
+    end
+  end
+end

data/lib/tomoto/mglda.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Tomoto
+  class MGLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k_g: 1, k_l: 1, t: 3, alpha_g: 0.1, alpha_l: 0.1, alpha_mg: 0.1, alpha_ml: 0.1, eta_g: 0.01) #, eta_l: 0.01, gamma: 0.1, seed: nil)
+      model = _new(to_tw(tw), k_g, k_l, t, alpha_g, alpha_l, alpha_mg, alpha_ml, eta_g)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+    def add_doc(doc, delimiter: ".")
+      _add_doc(prepare_doc(doc), delimiter)
+    end
+  end
+end

data/lib/tomoto/pa.rb ADDED Viewed

@@ -0,0 +1,11 @@
+module Tomoto
+  class PA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k1: 1, k2: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), k1, k2, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+  end
+end

data/lib/tomoto/plda.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Tomoto
+  class PLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, latent_topics: 1, alpha: 0.1, eta: 0.01, seed: nil)
+      model = _new(to_tw(tw), latent_topics, alpha, eta, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+    def add_doc(doc, labels: [])
+      _add_doc(prepare_doc(doc), labels)
+    end
+  end
+end

data/lib/tomoto/slda.rb ADDED Viewed

@@ -0,0 +1,37 @@
+module Tomoto
+  class SLDA
+    def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, vars: "", alpha: 0.1, eta: 0.01, mu: [], nu_sq: [], glm_param: [], seed: nil)
+      model = _new(to_tw(tw), k, vars.split("").map { |v| to_glm(v) }, alpha, eta, mu, nu_sq, glm_param, seed || -1)
+      model.instance_variable_set(:@min_cf, min_cf)
+      model.instance_variable_set(:@min_df, min_df)
+      model.instance_variable_set(:@rm_top, rm_top)
+      init_params(model, binding)
+    end
+    def add_doc(doc, y: [])
+      _add_doc(prepare_doc(doc), y)
+    end
+    def var_type(var_id)
+      raise "train() should be called first" unless @prepared
+      _var_type(var_id)
+    end
+    private
+    class << self
+      private
+      def to_glm(v)
+        case v
+        when "l"
+          0
+        when "b"
+          1
+        else
+          raise "Invalid var: #{v}"
+        end
+      end
+    end
+  end
+end

data/lib/tomoto/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Tomoto
+  VERSION = "0.3.0"
+end

data/lib/tomoto.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# ext
+begin
+  require "tomoto/#{RUBY_VERSION.to_f}/tomoto"
+rescue LoadError
+  require "tomoto/tomoto"
+end
+# modules
+require "tomoto/ct"
+require "tomoto/dmr"
+require "tomoto/dt"
+require "tomoto/gdmr"
+require "tomoto/hdp"
+require "tomoto/hlda"
+require "tomoto/hpa"
+require "tomoto/lda"
+require "tomoto/llda"
+require "tomoto/mglda"
+require "tomoto/pa"
+require "tomoto/plda"
+require "tomoto/slda"
+require "tomoto/version"
+module Tomoto
+  PARALLEL_SCHEME = [:default, :none, :copy_merge, :partition]
+  TERM_WEIGHT = [:one, :idf, :pmi]
+end

data/vendor/EigenRand/EigenRand/EigenRand ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * @file EigenRand
+ * @author bab2min (bab2min@gmail.com)
+ * @brief
+ * @version 0.4.0
+ * @date 2021-09-17
+ *
+ * @copyright Copyright (c) 2020-2021
+ *
+ */
+#ifndef EIGENRAND_EIGENRAND_H
+#define EIGENRAND_EIGENRAND_H
+#include <Eigen/Dense>
+#include <Eigen/src/Core/util/DisableStupidWarnings.h>
+#include "Macro.h"
+#include "Core.h"
+#include <Eigen/src/Core/util/ReenableStupidWarnings.h>
+#endif

data/vendor/EigenRand/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2020, bab2min
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.