RubyGems - machine_learning_workbench - Versions diffs - 0.6.1 → 0.7.0 - Mend

machine_learning_workbench 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: aa6944de3c6c7e7ef318e456aaf2479ff577773b
-  data.tar.gz: dce74b8e349c4f15e6a65c32805e8fa6f7e95253
+  metadata.gz: 8897ba173dbfa944cf55b3ca7b57eb3af87bbff7
+  data.tar.gz: 44883310f216b187d5d3ccce669e85f946e6ee5f
 SHA512:
-  metadata.gz: 86858d8e37e499ad296476a92afbaf59ecee6edf9c499c8d1e786ec25d39d676f5852f9faa7f14ed5afeac8f9b8d92a3fd478adb88d85d5d03acb3ca9ac804c6
-  data.tar.gz: 5bd9161716409f4470d4b5d11aad78c9cf907c2acde8486928d3622f175c20f7fb608e27d33b7ede9fcb2ab9339fc95d8d0f6f4d6940c029793e7a854cfa770d
+  metadata.gz: 75d8a1f4d2087746dae316ca47c07925858826bdf393eddde7bb2f82e22b47e2d9c2c6bbaa6e0ce10fea1ccb0b9df1882e80b58519a2107f59732d6f99ea1a76
+  data.tar.gz: d4945335adc99edaabd26b56ac7bb49936d0642d58a09516ac63fae27e11d871db879e12db9cc3d0ec78788363c85c29558d382deb145713fa4051985e330f28

data/.gitignore CHANGED

@@ -7,6 +7,6 @@
 /spec/reports/
 /tmp/
 Gemfile.lock
+/stats/
 # rspec failure tracking
 .rspec_status

data/Rakefile CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require "bundler/gem_tasks"
 require "rspec/core/rake_task"

data/examples/image_compression.rb CHANGED

@@ -1,3 +1,7 @@
+# frozen_string_literal: true
+# Run as: `bundle exec ruby examples/image_compression.rb`
 require 'rmagick'
 require 'machine_learning_workbench'
 VectorQuantization = MachineLearningWorkbench::Compressor::VectorQuantization

data/examples/neuroevolution.rb CHANGED

@@ -1,3 +1,7 @@
+# frozen_string_literal: true
+# Run as: `bundle exec ruby examples/neuroevolution.rb`
 # Make sure the gem is installed first with `gem install machine_learning_workbench`
 # Alternatively, add `gem 'machine_learning_workbench'` to your Gemfile if using Bundle,
 # followed by a `bundle install`

data/lib/machine_learning_workbench.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 gpu = false             # prepare for switching to GPUs
 if gpu

data/lib/machine_learning_workbench/compressor.rb CHANGED

@@ -1,3 +1,6 @@
+# frozen_string_literal: true
 require_relative 'compressor/vector_quantization'
 require_relative 'compressor/decaying_learning_rate_vq'
 require_relative 'compressor/copy_vq'
+require_relative 'compressor/incr_dict_vq'

data/lib/machine_learning_workbench/compressor/copy_vq.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Compressor
   # Train-less VQ, copying new images into centroids
   # Optimized for online training.

data/lib/machine_learning_workbench/compressor/decaying_learning_rate_vq.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Compressor
   # VQ with per-centroid decaying learning rates.
   # Optimized for online training.

data/lib/machine_learning_workbench/compressor/incr_dict_vq.rb ADDED

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+module MachineLearningWorkbench::Compressor
+  # Incremental Dictionary Train-less VQ, creating new centroids rather than training
+  # Optimized for online training.
+  # TODO: as the deadline grows nigh, the hacks grow foul. Refactor all VQs together.
+  class IncrDictVQ < VectorQuantization
+    attr_reader :equal_simil
+    undef :ntrains # centroids are not trained
+    def initialize **opts
+      puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
+      puts "Ignoring similarity: `simil_type: #{opts[:simil_type]}`" if opts[:simil_type]
+      puts "Ignoring ncentrs: `ncentrs: #{opts[:ncentrs]}`" if opts[:ncentrs]
+      # TODO: try different epsilons to reduce the number of states
+      # for example, in qbert we care what is lit and what is not, not the colors
+      @equal_simil = opts.delete(:equal_simil) || 0.0
+      super **opts.merge({ncentrs: 1, lrate: nil, simil_type: nil})
+      @ntrains = nil # will disable the counting
+    end
+    # Overloading lrate check from original VQ
+    def check_lrate lrate; nil; end
+    # Train on one vector:
+    # - train only if the image is not already in dictionary
+    # - create new centroid from the image
+    # @return [Integer] index of new centroid
+    def train_one vec, eps: equal_simil
+      mses = centrs.map do |centr|
+        ((centr-vec)**2).sum / centr.size # uhm get rid of division maybe? squares?
+      end
+      min_mse = mses.min
+      # skip training if the centr with smallest mse (most similar) has less than eps error (equal)
+      # TODO: maintain an average somewhere, make eps dynamic
+      return if min_mse < eps
+      puts "Creating centr #{ncentrs} (min_mse: #{min_mse})"
+      centrs << vec
+      @utility = @utility.concatenate 0
+      @ncentrs.tap{ @ncentrs += 1}
+    end
+  end
+end

data/lib/machine_learning_workbench/compressor/vector_quantization.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Compressor
   # Standard Vector Quantization
@@ -15,7 +17,7 @@ module MachineLearningWorkbench::Compressor
       check_lrate lrate # hack: so that we can overload it in dlr_vq
       @lrate = lrate
       @simil_type = simil_type || :dot
-      @encoding_type = encoding_type || :ensemble_norm
+      @encoding_type = encoding_type || :norm_ensemble
       @init_centr_vrange ||= vrange
       @vrange = case vrange
         when Array
@@ -80,6 +82,12 @@ module MachineLearningWorkbench::Compressor
         @ncodes += 1
         @utility[code] += 1
         code
+      when :most_similar_ary
+        code = simils.new_zeros
+        code[simils.max_index] = 1
+        @ncodes += 1
+        @utility += code
+        code
       when :ensemble
         code = simils
         tot = simils.sum
@@ -88,10 +96,17 @@ module MachineLearningWorkbench::Compressor
         @ncodes += 1
         @utility += (contrib - utility) / ncodes # cumulative moving average
         code
-      when :ensemble_norm
+      when :norm_ensemble
         tot = simils.sum
         tot = 1 if tot < 1e-5  # HACK: avoid division by zero
         code = simils / tot
+        @ncodes += 1
+        @utility += (code - utility) / ncodes # cumulative moving average
+        code
+      when :sparse_coding
+        raise NotImplementedError, "do this next"
         @ncodes += 1
         @utility += (code - utility) / ncodes # cumulative moving average
         code
@@ -104,11 +119,15 @@ module MachineLearningWorkbench::Compressor
       case type
       when :most_similar
         centrs[code]
+      when :most_similar_ary
+        centrs[code.eq(1).where[0]]
       when :ensemble
         tot = code.reduce :+
         centrs.zip(code).map { |centr, contr| centr*contr/tot }.reduce :+
-      when :ensemble_norm
+      when :norm_ensemble
         centrs.zip(code).map { |centr, contr| centr*contr }.reduce :+
+      when :sparse_coding
+        raise NotImplementedError, "do this next"
       else raise ArgumentError, "unrecognized reconstruction type: #{type}"
       end
     end
@@ -148,7 +167,7 @@ module MachineLearningWorkbench::Compressor
       vec_lst.each_with_index do |vec, i|
         trained_idx = train_one vec
         print '.' if debug
-        @ntrains[trained_idx] += 1
+        @ntrains[trained_idx] += 1 if @ntrains
       end
     end
   end

data/lib/machine_learning_workbench/monkey.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 # Monkey patches

data/lib/machine_learning_workbench/neural_network.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'neural_network/base'
 require_relative 'neural_network/feed_forward'
 require_relative 'neural_network/recurrent'

data/lib/machine_learning_workbench/neural_network/base.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::NeuralNetwork
   # Neural Network base class
@@ -26,10 +27,10 @@ module MachineLearningWorkbench::NeuralNetwork
     # @param struct [Array<Integer>] list of layer sizes
     # @param act_fn [Symbol] choice of activation function for the neurons
-    def initialize struct, act_fn: nil
+    def initialize struct, act_fn: nil, **act_fn_args
       @struct = struct
       @act_fn_name = act_fn || :sigmoid
-      @act_fn = send(act_fn_name)
+      @act_fn = send act_fn_name, **act_fn_args
       # @state holds both inputs, possibly recurrency, and bias
       # it is a complete input for the next layer, hence size from layer sizes
       @state = layer_row_sizes.collect do |size|
@@ -163,10 +164,10 @@ module MachineLearningWorkbench::NeuralNetwork
     ## Activation functions
     # Traditional sigmoid (logistic) with variable steepness
-    def sigmoid k=1
-      # k is steepness:  0<k<1 is flatter, 1<k is flatter
+    def sigmoid steepness: 1
+      # steepness:  0<s<1 is flatter, 1<s is flatter
       # flatter makes activation less sensitive, better with large number of inputs
-      -> (vec) { 1.0 / (NMath.exp(-k * vec) + 1.0) }
+      -> (vec) { 1.0 / (NMath.exp(-steepness * vec) + 1.0) }
     end
     alias logistic sigmoid

data/lib/machine_learning_workbench/neural_network/feed_forward.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::NeuralNetwork
   # Feed Forward Neural Network

data/lib/machine_learning_workbench/neural_network/recurrent.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::NeuralNetwork
   # Recurrent Neural Network

data/lib/machine_learning_workbench/optimizer.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer
 end

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Natural Evolution Strategies base class
@@ -19,11 +20,15 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     #   a single instance.
     # @param rescale_popsize [Float] scaling for the default population size
     # @param rescale_lrate [Float] scaling for the default learning rate
-    def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1, parallel_fit: false, rescale_popsize: 1, rescale_lrate: 1
-      raise ArgumentError unless [:min, :max].include? opt_type
-      raise ArgumentError unless obj_fn.respond_to? :call
+    def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1, parallel_fit: false, rescale_popsize: 1, rescale_lrate: 1, utilities: nil, popsize: nil, lrate: nil
+      raise ArgumentError, "opt_type: #{opt_type}" unless [:min, :max].include? opt_type
+      raise ArgumentError, "obj_fn not callable: #{obj_fn}" unless obj_fn.respond_to? :call
+      raise ArgumentError, "utilities only if popsize" if utilities && popsize.nil?
+      raise ArgumentError, "wrong sizes" if utilities && utilities.size != popsize
+      raise ArgumentError, "minimum popsize 5 for default utilities" if popsize&.<(5) && utilities.nil?
       @ndims, @opt_type, @obj_fn, @parallel_fit = ndims, opt_type, obj_fn, parallel_fit
-      @rescale_popsize, @rescale_lrate = rescale_popsize, rescale_lrate
+      @rescale_popsize, @rescale_lrate = rescale_popsize, rescale_lrate # rescale defaults
+      @utilities, @popsize, @lrate = utilities, popsize, lrate # if not set, defaults below
       @eye = NArray.eye(ndims)
       rseed ||= Random.new_seed
       # puts "NES rseed: #{s}"  # currently disabled
@@ -44,18 +49,20 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     end
     # Memoized automatic magic numbers
+    # Initialization options allow to rescale or entirely override these.
     # NOTE: Doubling popsize and halving lrate often helps
     def utils;   @utilities ||= cmaes_utilities end
     # (see #utils)
-    def popsize; @popsize   ||= cmaes_popsize * rescale_popsize end
+    def popsize; @popsize   ||= Integer(cmaes_popsize * rescale_popsize) end
     # (see #utils)
     def lrate;   @lrate     ||= cmaes_lrate * rescale_lrate end
-    # Magic numbers from CMA-ES (TODO: add proper citation)
+    # Magic numbers from CMA-ES (see `README` for citation)
     # @return [NArray] scale-invariant utilities
     def cmaes_utilities
       # Algorithm equations are meant for fitness maximization
       # Match utilities with individuals sorted by INCREASING fitness
+      raise ArgumentError, "Minimum `popsize` should be 5 (is #{popsize})" if popsize < 5
       log_range = (1..popsize).collect do |v|
         [0, Math.log(popsize.to_f/2 - 1) - Math.log(v)].max
       end

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb CHANGED

@@ -1,11 +1,13 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Block-Diagonal Natural Evolution Strategies
   class BDNES < Base
-    MAX_RSEED = 10**Random.new_seed.size # same range as Random.new_seed
+    MAX_RSEED = 10**Random.new_seed.size # block random seeds to be on the same range as `Random.new_seed`
-    attr_reader :ndims_lst, :blocks, :popsize
+    attr_reader :ndims_lst, :blocks, :popsize, :parallel_update
+    undef :ndims # only `ndims_lst` here
     # Initialize a list of XNES, one for each block
     # see class `Base` for the description of the rest of the arguments.
@@ -13,7 +15,8 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     #    matrix. Note: entire (reconstructed) individuals will be passed to the `obj_fn`
     #    regardless of the division here described.
     # @param init_opts [Hash] the rest of the options will be passed directly to XNES
-    def initialize ndims_lst, obj_fn, opt_type, parallel_fit: false, rseed: nil, **init_opts
+    # @parellel_update [bool] whether to parallelize block updates
+    def initialize ndims_lst, obj_fn, opt_type, parallel_fit: false, rseed: nil, parallel_update: false, **init_opts
       # mu_init: 0, sigma_init: 1
       # init_opts = {rseed: rseed, mu_init: mu_init, sigma_init: sigma_init}
       # TODO: accept list of `mu_init`s and `sigma_init`s
@@ -21,9 +24,8 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
       block_fit = -> (*args) { raise "Should never be called" }
       # the BD-NES seed should ensure deterministic reproducibility
       # but each block should have a different seed
-      rseed ||= Random.new_seed
       # puts "BD-NES rseed: #{s}"  # currently disabled
-      @rng = Random.new rseed
+      @rng = Random.new rseed || Random.new_seed
       @blocks = ndims_lst.map do |ndims|
         b_rseed = rng.rand MAX_RSEED
         XNES.new ndims, block_fit, opt_type, rseed: b_rseed, **init_opts
@@ -34,6 +36,8 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
       @best = [(opt_type==:max ? -1 : 1) * Float::INFINITY, nil]
       @last_fits = []
+      @parallel_update = parallel_update
+      require 'parallel' if parallel_update
     end
     def sorted_inds_lst
@@ -82,9 +86,22 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     # duck-type the interface: [:train, :mu, :convergence, :save, :load]
+    # TODO: refactor DRY
     def train picks: sorted_inds_lst
-      blocks.zip(sorted_inds_lst).each do |xnes, s_inds|
-        xnes.train picks: s_inds
+      if parallel_update
+        # Parallel.each(blocks.zip(picks)) do |xnes, s_inds|
+        #   xnes.train picks: s_inds
+        # end
+        # Actually it's not this simple.
+        # Forks do not act on the parent, so I need to send back updated mu and sigma
+        # Luckily we have `NES#save` and `NES#load` at the ready
+        # Next: need to implement `#marshal_dump` and `#marshal_load` in `Base`
+        # Actually using `Cumo` rather than `Parallel` may avoid marshaling altogether
+        raise NotImplementedError, "Should dump and load each instance"
+      else
+        blocks.zip(picks).each do |xnes, s_inds|
+          xnes.train picks: s_inds
+        end
       end
     end
@@ -92,6 +109,10 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
       blocks.map(&:mu).reduce { |mem, var| mem.concatenate var, axis: 1 }
     end
+    def sigma
+      raise NotImplementedError, "need to write a concatenation like for mu here"
+    end
     def convergence
       blocks.map(&:convergence).reduce(:+)
     end

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/fnes.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Fixed Variance Natural Evolution Strategies

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/rnes.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Radial Natural Evolution Strategies

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Separable Natural Evolution Strategies

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Exponential Natural Evolution Strategies
@@ -11,17 +12,23 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
           NArray[mu_init]
         when Numeric
           NArray.new([1,ndims]).fill mu_init
+        when NArray
+          raise ArgumentError unless mu_init.size == ndims
+          mu_init.ndim < 2 ? mu_init.reshape(1, ndims) : mu_init
         else
           raise ArgumentError, "Something is wrong with mu_init: #{mu_init}"
       end
       @sigma = case sigma_init
-      when Array
-        raise ArgumentError unless sigma_init.size == ndims
-        NArray[*sigma_init].diag
-      when Numeric
-        NArray.new([ndims]).fill(sigma_init).diag
-      else
-        raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
+        when Array
+          raise ArgumentError unless sigma_init.size == ndims
+          NArray[*sigma_init].diag
+        when Numeric
+          NArray.new([ndims]).fill(sigma_init).diag
+        when NArray
+          raise ArgumentError unless sigma_init.size == ndims**2
+          sigma_init.ndim < 2 ? sigma_init.reshape(ndims, ndims) : sigma_init
+        else
+          raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
       end
       # Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi)
       @log_sigma = NMath.log(sigma.diagonal).diag

data/lib/machine_learning_workbench/systems.rb CHANGED

@@ -1 +1,3 @@
+# frozen_string_literal: true
 require_relative 'systems/neuroevolution'

data/lib/machine_learning_workbench/systems/neuroevolution.rb CHANGED

@@ -1,2 +1,3 @@
+# frozen_string_literal: true
 "Work in progress"

data/lib/machine_learning_workbench/tools.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'tools/execution'
 require_relative 'tools/normalization'
 require_relative 'tools/imaging'

data/lib/machine_learning_workbench/tools/execution.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Tools
   module Execution
     $fork_pids ||= []

data/lib/machine_learning_workbench/tools/imaging.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Tools
   module Imaging
     Forkable = MachineLearningWorkbench::Tools::Execution

data/lib/machine_learning_workbench/tools/normalization.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Tools
   module Normalization
     def self.feature_scaling narr, from: nil, to: [0,1]

data/lib/machine_learning_workbench/tools/verification.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 module MachineLearningWorkbench::Tools
   module Verification
     # TODO: switch to NArray

data/machine_learning_workbench.gemspec CHANGED

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 lib = File.expand_path("../lib", __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: machine_learning_workbench
 version: !ruby/object:Gem::Version
-  version: 0.6.1
+  version: 0.7.0
 platform: ruby
 authors:
 - Giuseppe Cuccu
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-04-12 00:00:00.000000000 Z
+date: 2018-04-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -204,6 +204,7 @@ files:
 - lib/machine_learning_workbench/compressor.rb
 - lib/machine_learning_workbench/compressor/copy_vq.rb
 - lib/machine_learning_workbench/compressor/decaying_learning_rate_vq.rb
+- lib/machine_learning_workbench/compressor/incr_dict_vq.rb
 - lib/machine_learning_workbench/compressor/vector_quantization.rb
 - lib/machine_learning_workbench/monkey.rb
 - lib/machine_learning_workbench/neural_network.rb