RubyGems - machine_learning_workbench - Versions diffs - 0.4.5 → 0.5.0 - Mend

machine_learning_workbench 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 59f6c7b552e7165e67222a70e5ceb231bf482d69
-  data.tar.gz: '09e9df7a6e045fadec71456f327f1a8118e31a5d'
+  metadata.gz: baafa2eb71e8bbcc83d6a320437c2a0ad8f5544e
+  data.tar.gz: 5337ad91449767ca2754b41b2784212bfe5fbae7
 SHA512:
-  metadata.gz: 5eae025849e2640b3137128a912cb54ce6f48fad9c27c63462dc08cd787c1a53527c2bcdcd06f0230bc0e7b1b50d8699f3778878ea7331f00a66bbc7ec114baf
-  data.tar.gz: 6aced52a85fd32650bdc88810c596d3305d2f3efeab2034a0ac73e58f3691dda5a4013e89a79546da033cb5ed81833bb090b0b69b7d7ed32e94d46b900bab165
+  metadata.gz: 276d67ea700d5d9a6c0ee450cf0839438c77d7b7ce8dce87173b13a0898d0615cf30cc1f803686cb2da9e76953e601f8e702948853b2332cdffd57f7250ed72f
+  data.tar.gz: 3b9e2fdee7f0435d4acd52b902de928705e021f2e1715b58a09128d69cf129e4aaa7ad8f33d0545a74761d0b2345aa8df6a3ac112b7ebb8c67002431e686ce90

data/examples/neuroevolution.rb CHANGED Viewed

@@ -3,7 +3,6 @@
 # followed by a `bundle install`
 require 'machine_learning_workbench'
 # Workbench shorthands
-WB = MachineLearningWorkbench
 XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
 FFNN = WB::NeuralNetwork::FeedForward
@@ -11,7 +10,7 @@ FFNN = WB::NeuralNetwork::FeedForward
 XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
 # A classic [2,2,1] feed-forward network will do: 2 inputs, 2 hidden, 1 output
 # For other uses, make sure you match the first number to the number of inputs, and
-# the last one as the number of outputs; then add as many layers as needed, by
+# the last one as the number of outputs; then add as many layers as needed, by
 # specifying the size of each. Here we have only one, of size 2.
 # NOTE: If this totals thousands of weights, you may want to switch to SNES or BDNES
 # for speed. In the second case, use the function `nweights_per_layer` when instantiating

data/lib/machine_learning_workbench.rb CHANGED Viewed

@@ -2,17 +2,20 @@
 gpu = false             # prepare for switching to GPUs
 if gpu
   require 'cumo/narray'
-  require 'cumo/linalg'
   Xumo = Cumo
+  require 'cumo/linalg'
 else
   require 'numo/narray'
+  Xumo = Numo
   # gem `numo-linalg` depends on openblas and lapacke:
   # `sudo apt install libopenblas-base liblapacke`
   require 'numo/linalg'
-  Xumo = Numo
 end
+# Shorthands
 NArray = Xumo::DFloat   # set a single data type across the WB for now
 NMath = Xumo::NMath     # shorthand for extended math module
+NLinalg = Xumo::Linalg  # shorthand for linear algebra module
 module MachineLearningWorkbench
   module Compressor
@@ -24,6 +27,7 @@ module MachineLearningWorkbench
   module Tools
   end
 end
+WB = MachineLearningWorkbench # import MachineLearningWorkbench as WB ;)
 require_relative 'machine_learning_workbench/monkey'
 require_relative 'machine_learning_workbench/tools'

data/lib/machine_learning_workbench/compressor/online_vector_quantization.rb CHANGED Viewed

@@ -3,11 +3,14 @@ module MachineLearningWorkbench::Compressor
   # Optimized for online training.
   class OnlineVectorQuantization < VectorQuantization
-    attr_reader :min_lrate
+    attr_reader :lrate_min, :lrate_min_den, :decay_rate
-    def initialize min_lrate: 0.01, **opts
+    def initialize **opts
+      puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
+      @lrate_min = opts.delete(:lrate_min) || 0.001
+      @lrate_min_den = opts.delete(:lrate_min_den) || 1
+      @decay_rate = opts.delete(:decay_rate) || 1
       super **opts.merge({lrate: nil})
-      @min_lrate = min_lrate
     end
     # Overloading lrate check from original VQ
@@ -17,12 +20,17 @@ module MachineLearningWorkbench::Compressor
     # @param centr_idx [Integer] index of the centroid
     # @param lower_bound [Float] minimum learning rate
     # @note nicely overloads the `attr_reader` of parent class
-    def lrate centr_idx, lower_bound: min_lrate
-      [1/ntrains[centr_idx], lower_bound].max
+    def lrate centr_idx, min_den: lrate_min_den, lower_bound: lrate_min, decay: decay_rate
+      [1.0/(ntrains[centr_idx]*decay+min_den), lower_bound].max
+      .tap { |l| puts "centr: #{centr_idx}, ntrains: #{ntrains[centr_idx]}, lrate: #{l}" }
     end
-    def train_one *args, **kwargs
-      raise NotImplementedError, "Remember to overload this using the new lrate(idx)"
+    # Train on one vector
+    # @return [Integer] index of trained centroid
+    def train_one vec
+      trg_idx, _simil = most_similar_centr(vec)
+      centrs[trg_idx] = centrs[trg_idx] * (1-lrate(trg_idx)) + vec * lrate(trg_idx)
+      trg_idx
     end
   end

data/lib/machine_learning_workbench/compressor/vector_quantization.rb CHANGED Viewed

@@ -2,15 +2,18 @@ module MachineLearningWorkbench::Compressor
   # Standard Vector Quantization
   class VectorQuantization
-    attr_reader :ncentrs, :centrs, :dims, :vrange, :lrate, :rng, :ntrains
+    attr_reader :ncentrs, :centrs, :dims, :vrange, :init_centr_vrange, :lrate, :simil_type, :rng, :ntrains
     Verification = MachineLearningWorkbench::Tools::Verification
-    def initialize ncentrs:, dims:, vrange:, lrate:, rseed: Random.new_seed
+    def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, init_centr_vrange: nil, rseed: Random.new_seed
+      # TODO: RNG CURRENTLY NOT USED!!
       @rng = Random.new rseed
       @ncentrs = ncentrs
       @dims = Array(dims)
       check_lrate lrate # hack: so that we can overload it in online_vq
       @lrate = lrate
+      @simil_type = simil_type || :dot
+      @init_centr_vrange ||= vrange
       @vrange = case vrange
         when Array
           raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
@@ -19,30 +22,48 @@ module MachineLearningWorkbench::Compressor
           [vrange.first, vrange.last].map &method(:Float)
         else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
       end
-      @centrs = ncentrs.times.map { new_centr }
+      init_centrs
       @ntrains = [0]*ncentrs # useful to understand what happens
     end
     # Verify lrate to be present and withing unit bounds
-    # As a separate method only so it can be overloaded in online_vq
+    # As a separate method only so it can be overloaded in `OnlineVectorQuantization`
     def check_lrate lrate
       raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
     end
+    # Initializes a list of centroids
+    def init_centrs nc: ncentrs, base: nil, proport: nil
+      @centrs = nc.times.map { new_centr base, proport }
+    end
     # Creates a new (random) centroid
-    def new_centr
-      NArray.new(*dims).rand(*vrange)
+    # If a base is passed, this is meshed with the random centroid.
+    # This is done to facilitate distributing the training across centroids.
+    # TODO: USE RNG HERE!!
+    def new_centr base=nil, proport=nil
+      raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
+      # require 'pry'; binding.pry if base.nil? ^ proport.nil?
+      ret = NArray.new(*dims).rand(*init_centr_vrange)
+      ret = ret * (1-proport) + base * proport if base&&proport
+      ret
     end
+    SIMIL = {
+      dot: -> (centr, vec) { centr.dot(vec) },
+      mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
+    }
     # Computes similarities between vector and all centroids
-    def similarities vec
+    def similarities vec, type: simil_type
       raise NotImplementedError if vec.shape.size > 1
-      centrs.map { |c| c.dot(vec) }
+      centrs.map { |centr| SIMIL[type].call centr, vec }
       # require 'parallel'
       # Parallel.map(centrs) { |c| c.dot(vec).first }
     end
     # Encode a vector
+    # TODO: optimize for Numo
     def encode vec, type: :most_similar
       simils = similarities vec
       case type
@@ -52,6 +73,7 @@ module MachineLearningWorkbench::Compressor
         simils
       when :ensemble_norm
         tot = simils.reduce(:+)
+        tot = 1 if tot == 0  # HACK: avoid division by zero
         simils.map { |s| s/tot }
       else raise ArgumentError, "unrecognized encode type: #{type}"
       end
@@ -83,19 +105,18 @@ module MachineLearningWorkbench::Compressor
     # Per-pixel errors in reconstructing vector
     # @return [NArray] residuals
-    def reconstr_error vec
-      reconstruction(vec) - vec
+    def reconstr_error vec, code: nil, type: :most_similar
+      code ||= encode vec, type: type
+      (vec - reconstruction(code, type: type)).abs.sum
     end
     # Train on one vector
     # @return [Integer] index of trained centroid
     def train_one vec
       trg_idx, _simil = most_similar_centr(vec)
-      # note: uhm that actually looks like a dot product... optimizable?
+      # note: uhm that actually looks like a dot product... maybe faster?
       #   `[c[i], vec].dot([1-lrate, lrate])`
       centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
-      # Verification.in_range! centrs[trg_idx], vrange # I verified it's not needed
       trg_idx
     end

data/lib/machine_learning_workbench/monkey.rb CHANGED Viewed

@@ -231,7 +231,7 @@ module MachineLearningWorkbench::Monkey
     # @param other [NArray] other matrix
     # @return [NArray]
     def outer_flat other
-      # TODO: Numo::NArray should be able to implement this with `#outer` and some other
+      # TODO: Xumo::NArray should be able to implement this with `#outer` and some other
       # function to flatten the right layer -- much faster
       raise ArgumentError, "Need to pass an operand block" unless block_given?
       self.class.zeros([self.size, other.size]).tap do |ret|
@@ -258,7 +258,7 @@ module MachineLearningWorkbench::Monkey
     # Inverses matrix
     # @return [NArray]
     def invert
-      Numo::Linalg.inv self
+      NLinalg.inv self
     end
   end
@@ -268,7 +268,7 @@ module MachineLearningWorkbench::Monkey
     def exponential
       raise ArgumentError if ndim > 2
       # special case: one-dimensional matrix: just exponentiate the values
-      return Numo::NMath.exp(self) if (ndim == 1) || shape.include?(1)
+      return NMath.exp(self) if (ndim == 1) || shape.include?(1)
       # at this point we need to validate it is a square matrix
       raise ArgumentError unless shape.reduce(&:==)
@@ -281,11 +281,11 @@ module MachineLearningWorkbench::Monkey
       # TODO: this is a simple but outdated method, switch to Pade approximation
       # https://github.com/scipy/scipy/blob/11509c4a98edded6c59423ac44ca1b7f28fba1fd/scipy/sparse/linalg/matfuncs.py#L557
-      # e_values, l_e_vectors, r_e_vectors_t = Numo::Linalg.svd self
-      evals, _wi, _vl, r_evecs = Numo::Linalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
+      # e_values, l_e_vectors, r_e_vectors_t = NLinalg.svd self
+      evals, _wi, _vl, r_evecs = NLinalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
       r_evecs_t = r_evecs#.transpose
       r_evecs_inv = r_evecs_t.invert
-      evals_exp_dmat = Numo::NMath.exp(evals).diag
+      evals_exp_dmat = NMath.exp(evals).diag
       # l_e_vectors.dot(e_vals_exp_dmat).dot(l_e_vectors.invert)#.transpose
       r_evecs_t.dot(evals_exp_dmat).dot(r_evecs_inv)

data/lib/machine_learning_workbench/neural_network/base.rb CHANGED Viewed

@@ -157,7 +157,7 @@ module MachineLearningWorkbench::NeuralNetwork
     # Extract and convert the output layer's activation
     # @return [NArray] the activation of the output layer
     def out
-      state.last
+      state.last.flatten
     end
     ## Activation functions
@@ -166,13 +166,13 @@ module MachineLearningWorkbench::NeuralNetwork
     def sigmoid k=0.5
       # k is steepness:  0<k<1 is flatter, 1<k is flatter
       # flatter makes activation less sensitive, better with large number of inputs
-      -> (x) { 1.0 / (Numo::NMath.exp(-k * x) + 1.0) }
+      -> (x) { 1.0 / (NMath.exp(-k * x) + 1.0) }
     end
     # Traditional logistic
     def logistic
       -> (x) do
-        exp = Numo::NMath.exp(x)
+        exp = NMath.exp(x)
         # exp.infinite? ? exp : exp / (1.0 + exp)
         exp / (1.0 + exp)
       end
@@ -181,7 +181,7 @@ module MachineLearningWorkbench::NeuralNetwork
     # LeCun hyperbolic activation
     # @see http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf Section 4.4
     def lecun_hyperbolic
-      -> (x) { 1.7159 * Numo::NMath.tanh(2.0*x/3.0) + 1e-3*x }
+      -> (x) { 1.7159 * NMath.tanh(2.0*x/3.0) + 1e-3*x }
     end
     # Rectified Linear Unit (ReLU)

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     # Box-Muller transform: generates standard (unit) normal distribution samples
     # @return [Float] a single sample from a standard normal distribution
-    # @note Numo::NArray implements this :) glad to have switched!
+    # @note Xumo::NArray implements this :) glad to have switched!
     def standard_normal_sample
       rho = Math.sqrt(-2.0 * Math.log(rng.rand))
       theta = 2 * Math::PI * rng.rand
@@ -80,7 +80,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     # Samples a standard normal distribution to construct a NArray of
     #   popsize multivariate samples of length ndims
     # @return [NArray] standard normal samples
-    # @note Numo::NArray implements this :) glad to have switched!
+    # @note Xumo::NArray implements this :) glad to have switched!
     def standard_normal_samples
       NArray.zeros([popsize, ndims]).tap do |ret|
         ret.each_with_index { |_,*i| ret[*i] = standard_normal_sample }
@@ -104,7 +104,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     # matched with individuals sorted by INCREASING fitness. Then reverse order for minimization.
     # @return standard normal samples sorted by the respective individuals' fitnesses
     def sorted_inds
-      # samples = standard_normal_samples # Numo::NArray implements the Box-Muller :)
+      # samples = standard_normal_samples # Xumo::NArray implements the Box-Muller :)
       samples = NArray.new([popsize, ndims]).rand_norm(0,1)
       inds = move_inds(samples)
       fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/rnes.rb CHANGED Viewed

@@ -28,7 +28,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     def train picks: sorted_inds
       g_mu = utils.dot(picks)
       # g_sigma = utils.dot(picks.row_norms**2 - ndims).first # back to scalar
-      row_norms = Numo::Linalg.norm picks, 2, axis:1
+      row_norms = NLinalg.norm picks, 2, axis:1
       g_sigma = utils.dot(row_norms**2 - ndims)[0] # back to scalar
       @mu += sigma.dot(g_mu.transpose).transpose * lrate
       @variance *= Math.exp(g_sigma * lrate / 2)

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
         raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
       end
       # Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi)
-      @log_sigma = Numo::NMath.log(sigma.diagonal).diag
+      @log_sigma = NMath.log(sigma.diagonal).diag
     end
     def train picks: sorted_inds

data/lib/machine_learning_workbench/tools/imaging.rb CHANGED Viewed

@@ -7,10 +7,11 @@ module MachineLearningWorkbench::Tools
     # @param narr [NArray] numeric matrix to display
     # @param shape [Array<Integer>] optional reshaping
     def self.narr_to_img narr, shape: nil
+      require 'rmagick'
       shape ||= narr.shape
       shape = [1, shape] if shape.kind_of?(Integer) || shape.size == 1
       # `Image::constitute` requires Float pixels to be in [0,1]
-      pixels = Norm.feature_scaling narr, to: [0,1]
+      pixels = Norm.feature_scaling narr.cast_to(NArray), to: [0,1]
       Magick::Image.constitute *shape, "I", pixels.to_a.flatten
     end
@@ -28,6 +29,7 @@ module MachineLearningWorkbench::Tools
     # @param shape [Array] the true shape of the image (numeric matrix could be flattened)
     # @param in_fork [bool] whether to execute the display in fork (and continue running)
     def self.display narr, disp_size: nil, shape: nil, in_fork: true
+      require 'rmagick'
       img = narr_to_img narr, shape: shape
       img.resize!(*disp_size, Magick::TriangleFilter,0.51) if disp_size
       if in_fork
@@ -43,6 +45,7 @@ module MachineLearningWorkbench::Tools
     # @param flat [bool] whether to return a flat array
     # @param dtype dtype for the numeric matrix, leave `nil` for automatic detection
     def self.narr_from_png fname, scale: nil, flat: false
+      require 'rmagick'
       img = Magick::ImageList.new(fname).first
       img.scale!(scale) if scale
       shape = [img.columns, img.rows]

data/lib/machine_learning_workbench/tools/normalization.rb CHANGED Viewed

@@ -4,7 +4,10 @@ module MachineLearningWorkbench::Tools
       from ||= narr.minmax
       old_min, old_max = from
       new_min, new_max = to
-      (narr-old_min)*(new_max-new_min)/(old_max-old_min)+new_min
+      ( (narr-old_min)*(new_max-new_min)/(old_max-old_min) ) + new_min
+    rescue ZeroDivisionError
+      # require 'pry'; binding.pry
+      raise ArgumentError, "If you get here, chances are there's a bug in `from` or `to`"
     end
     # @param per_column [bool] wheather to compute stats per-column or matrix-wise

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: machine_learning_workbench
 version: !ruby/object:Gem::Version
-  version: 0.4.5
+  version: 0.5.0
 platform: ruby
 authors:
 - Giuseppe Cuccu
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-03-27 00:00:00.000000000 Z
+date: 2018-04-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler