RubyGems - machine_learning_workbench - Versions diffs - 0.1.1 → 0.1.2 - Mend

machine_learning_workbench 0.1.1 → 0.1.2

Files changed (13) hide show

checksums.yaml +4 -4
data/.travis.yml +5 -1
data/README.md +16 -3
data/bin/console +4 -4
data/bin/setup +3 -0
data/examples/neuroevolution.rb +76 -0
data/lib/machine_learning_workbench.rb +0 -1
data/lib/machine_learning_workbench/neural_network/base.rb +3 -3
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +11 -4
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb +10 -6
data/lib/machine_learning_workbench/systems.rb +1 -0
data/lib/machine_learning_workbench/systems/neuroevolution.rb +2 -0
metadata +4 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 91771ca8a34d46668f837682cabd7ed97c4a70f3
-  data.tar.gz: 85bdd208575e094571b8d5528419d10a656d1e97
+  metadata.gz: 07202eab431a1fcddeb2f8c9cc669a3f63fa762d
+  data.tar.gz: 64c17b81b9a8476e664a40fb64c1faec3e8fb807
 SHA512:
-  metadata.gz: 10721ff0e089d0924af4c71f1d0febb04d84dacdc6a20351e4c46d893453274c53fd6cd0435331faadeda97b618fd1a43a4cc6aee43dc938484f2540512a86a7
-  data.tar.gz: 2c2c03d769105cc179960456ae81eedaeb0c89cf246fc89c6a150468c525f94cce36d4b47529466097ce91d0613b4a78f9ee51e4558a65106c5b8cc472ec21b9
+  metadata.gz: 24acec6a1948299718e10b93a8d22e983212af0bb255deb78e3e60ab7d118316b2c6561d59c23508098fe67b14d1edb6e539ab4b1e28d5e0fd44e85618ecc43d
+  data.tar.gz: 4123e04bb2eb291fb71accfe9041cab5dd474b9a302d3e26a589dd34f96491d3cddeedbde50024bb7deca62cedbd9b5161d98b43dc2e016ab8bcf8ea03eb9316

data/.travis.yml CHANGED

@@ -2,4 +2,8 @@ sudo: false
 language: ruby
 rvm:
   - 2.4.2
-before_install: gem install bundler -v 1.16.0
+addons:
+  apt:
+    packages:
+    - libatlas-base-dev
+before_install: gem install bundler -v 1.16.0

data/README.md CHANGED

@@ -1,4 +1,8 @@
-# Machine Learning Workbench
+# [Machine Learning Workbench](https://github.com/giuse/machine_learning_workbench)
+[![Gem Version](https://badge.fury.io/rb/machine_learning_workbench.svg)](https://badge.fury.io/rb/machine_learning_workbench)
+[![Build Status](https://travis-ci.org/giuse/machine_learning_workbench.svg?branch=master)](https://travis-ci.org/giuse/machine_learning_workbench)
+[![Code Climate](https://codeclimate.com/github/giuse/machine_learning_workbench/badges/gpa.svg)](https://codeclimate.com/github/giuse/machine_learning_workbench)
 This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
@@ -20,14 +24,23 @@ Or install it yourself as:
 ## Usage
-TODO: Write usage instructions here
+TLDR: Check out [the `examples` directory](examples), e.g. [this script](examples/neuroevolution.rb).
+This library is thought as a practical workbench: there is plenty of tools hanging, each has multiple uses and applications, and as such it is built as atomic and flexible as possible. Folders [in the lib structure](lib/machine_learning_workbench) categorize them them.
+The [systems directory](lib/machine_learning_workbench/systems) holds few examples of how to bring them together in higher abstractions, i.e. as _compound tools_.
+For example, a [neuroevolution setup](lib/machine_learning_workbench/systems/neuroevolution.rb) brings together evolutionary computation and neural networks.
+For an example of how to build it from scratch, check this [neuroevolution script](examples/neuroevolution.rb). To run it, use `bundle exec ruby examples/neuroevolution.rb`
 ## Development
-After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+After cloning the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
 To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
 ## Contributing
 Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/machine_learning_workbench.

data/bin/console CHANGED

@@ -1,14 +1,14 @@
 #!/usr/bin/env ruby
-require "bundler/setup"
-require "machine_learning_workbench"
+require 'bundler/setup'
+require 'machine_learning_workbench'
 # You can add fixtures and/or initialization code here to make experimenting
 # with your gem easier. You can also use a different console, if you like.
-# (If you use this, don't forget to add pry to your Gemfile!)
-require "pry"
+require 'pry'
 Pry.start
+# alternatively:
 # require "irb"
 # IRB.start(__FILE__)

data/bin/setup CHANGED

@@ -3,6 +3,9 @@ set -euo pipefail
 IFS=$'\n\t'
 set -vx
+# ubuntu:
+sudo apt install libatlas-base-dev # for nmatrix
 bundle install
 # Do any other automated setup that you need to do here

data/examples/neuroevolution.rb ADDED

@@ -0,0 +1,76 @@
+# Make sure the gem is installed first with `gem install machine_learning_workbench`
+# Alternatively, add `gem 'machine_learning_workbench'` to your Gemfile if using Bundle,
+# followed by a `bundle install`
+require 'machine_learning_workbench'
+# Workbench shorthands
+WB = MachineLearningWorkbench
+XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
+FFNN = WB::NeuralNetwork::FeedForward
+# Let's address the XOR problem, as it requires nonlinear fitting
+XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
+# A classic [2,2,1] feed-forward network will do: 2 inputs, 2 hidden, 1 output
+# For other uses, make sure you match the first number to the number of inputs, and
+# the last one as the number of outputs; then add as many layers as needed, by
+# specifying the size of each. Here we have only one, of size 2.
+# NOTE: If this totals thousands of weights, you may want to switch to SNES or BDNES
+# for speed. In the second case, use the function `nweights_per_layer` when instantiating
+# BDNES rather than `nweights`.
+NET = FFNN.new [2,2,1], act_fn: :logistic
+# Note: the process is exactly the same, from instantiation to training, for recurrent
+# networks using the class `WB::NeuralNetwork::Recursive`.
+# Of course RNNs should be applied to sequential tasks, while XOR is static
+# We will search for the network's weights with a black-box optimization algorithm
+# This means we will search for arrays of numbers, which need to be scored.
+# The scoring process will work as follows: use the numbers as weights for the neural
+# network, test the network on classifying the 4 cases of XOR, use that count as the
+# score for the weights (original array of numbers).
+# Hence the fitness looks as follows:
+def fitness weights
+  # Each list of weights uniquely defines a neural network
+  NET.load_weights weights
+  # Activate the network on each of the XOR instances
+  # - prediction: the output of the network
+  # - observation: correct value, our target
+  pred_obs = XOR.map do |input, obs|
+    # The network can have an arbitrary number of output neurons
+    # Since here we have only one, we extract the value calling `#first`
+    output = NET.activate(input).first
+    # Here we interpret the output as classification
+    pred = output > 0.5 ? 1 : 0
+    # Finally accumulate prediction-observation pairs
+    [pred, obs]
+  end
+  # To build a score out of this, we count the number of correct classifications
+  score = Float(pred_obs.count { |pr, ob| pr == ob })
+  # That's it, this will score the weights based on their network's performance
+end
+# Next comes initializing the black-box stochastic optimization algorithm
+# We are searching for the network's weights, this gives us the search space dimensionality
+# We'll use XNES as we are working with less than 100 dimensions (weights)
+nes = XNES.new NET.nweights, method(:fitness), :max, rseed: 15
+# Note: the random seed is fixed here to ensure the task is solved in one try in few iterations
+# In a real task, best using an over-large network, more iterations, and try several seeds
+# NOTE: In practical applications it is best to delegate parallelization to the fitness
+# function instead of computing the fitness of one individual at a time. This can be
+# achieved by passing  an objective function defined on a _list_ of weight-lists, and
+# setting the `parallel_fit` switch to `true`:
+# nes = XNES.new NET.nweights,
+#   -> (genotypes) { Parallel.map genotypes, &method(:fitness) },
+#   :max, rseed: 15, parallel_fit: true
+# Nothing left but to run the optimization algorithm, few epochs here will suffice
+50.times { nes.train }
+# OK! now remember, `NET` currently holds the weights of the last evaluation
+# Let's fetch the best individual found so far
+best_fit, best_weights = nes.best
+# Let's run them again to check they work
+result = fitness best_weights # careful here if you defined a parallel `fitness`
+puts "The found network achieves a score of #{result} out of 4 in the XOR task"
+puts "Weights: #{best_weights}"
+puts "Done!"

data/lib/machine_learning_workbench.rb CHANGED

@@ -7,7 +7,6 @@ module MachineLearningWorkbench
   end
   module Optimizer
   end
-  end
   module Tools
   end
 end

data/lib/machine_learning_workbench/neural_network/base.rb CHANGED

@@ -123,7 +123,7 @@ module MachineLearningWorkbench::NeuralNetwork
     # @return [true] always true. If something's wrong it simply fails, and if
     #   all goes well there's nothing to return but a confirmation to the caller.
     def load_weights weights
-      raise "Hell!" unless weights.size == nweights
+      raise ArgumentError unless weights.size == nweights
       weights_iter = weights.each
       @layers = layer_shapes.collect do |shape|
         NMatrix.new(shape, dtype: :float64) { weights_iter.next }
@@ -144,8 +144,8 @@ module MachineLearningWorkbench::NeuralNetwork
     # @param input [Array<Float>] the given input
     # @return [Array] the activation of the output layer
     def activate input
-      raise "Hell!" unless input.size == struct.first
-      raise "Hell!" unless input.is_a? Array
+      raise ArgumentError unless input.size == struct.first
+      raise ArgumentError unless input.is_a? Array
       # load input in first state
       @state[0][0, 0..-2] = input
       # activate layers in sequence

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb CHANGED

@@ -2,17 +2,24 @@
 module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
   # Natural Evolution Strategies base class
   class Base
-    attr_reader :ndims, :mu, :sigma, :opt_type, :obj_fn, :id, :rng, :last_fits, :best
+    attr_reader :ndims, :mu, :sigma, :opt_type, :obj_fn, :parallel_fit, :id, :rng, :last_fits, :best
     # NES object initialization
     # @param ndims [Integer] number of parameters to optimize
     # @param obj_fn [#call] any object defining a #call method (Proc, lambda, custom class)
     # @param opt_type [:min, :max] select minimization / maximization of obj_fn
     # @param rseed [Integer] allow for deterministic execution on rseed provided
-    def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1
+    # @param mu_init [Numeric] values to initalize the distribution's mean
+    # @param sigma_init [Numeric] values to initialize the distribution's covariance
+    # @param parallel_fit [boolean] whether the `obj_fn` should be passed all the individuals
+    #   together. In the canonical case the fitness function always scores a single individual;
+    #   in practical cases though it is easier to delegate the scoring parallelization to the
+    #   external fitness function. Turning this to `true` will make the algorithm pass _an
+    #   Array_ of individuals to the fitness function, rather than a single instance.
+    def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1, parallel_fit: false
       raise ArgumentError unless [:min, :max].include? opt_type
       raise ArgumentError unless obj_fn.respond_to? :call
-      @ndims, @opt_type, @obj_fn = ndims, opt_type, obj_fn
+      @ndims, @opt_type, @obj_fn, @parallel_fit = ndims, opt_type, obj_fn, parallel_fit
       @id = NMatrix.identity(ndims, dtype: :float64)
       rseed ||= Random.new_seed
       # puts "NES rseed: #{s}"  # currently disabled
@@ -89,7 +96,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     def sorted_inds
       samples = standard_normal_samples
       inds = move_inds(samples).to_a
-      fits = obj_fn.call(inds)
+      fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)
       # Quick cure for NaN fitnesses
       fits.map! { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x }
       @last_fits = fits # allows checking for stagnation

data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb CHANGED

@@ -5,15 +5,20 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     MAX_RSEED = 10**Random.new_seed.size # same range as Random.new_seed
-    attr_reader :ndims_lst, :obj_fn, :opt_type, :blocks, :popsize, :rng,
+    attr_reader :ndims_lst, :obj_fn, :opt_type, :parallel_fit, :blocks, :popsize, :rng,
       :best, :last_fits
-    # initialize a list of XNES for each block
-    def initialize ndims_lst, obj_fn, opt_type, rseed: nil, **init_opts
+    # Initialize a list of XNES, one for each block
+    # see class `Base` for the description of the rest of the arguments.
+    # @param ndims_lst [Array<Integer>] list of sizes for each block in the block-diagonal
+    #    matrix. Note: entire (reconstructed) individuals will be passed to the `obj_fn`
+    #    regardless of the division here described.
+    # @param init_opts [Hash] the rest of the options will be passed directly to XNES
+    def initialize ndims_lst, obj_fn, opt_type, parallel_fit: false, rseed: nil, **init_opts
       # mu_init: 0, sigma_init: 1
       # init_opts = {rseed: rseed, mu_init: mu_init, sigma_init: sigma_init}
       # TODO: accept list of `mu_init`s and `sigma_init`s
-      @ndims_lst, @obj_fn, @opt_type = ndims_lst, obj_fn, opt_type
+      @ndims_lst, @obj_fn, @opt_type, @parallel_fit = ndims_lst, obj_fn, opt_type, parallel_fit
       block_fit = -> (*args) { raise "Should never be called" }
       # the BD-NES seed should ensure deterministic reproducibility
       # but each block should have a different seed
@@ -48,7 +53,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
       full_samples = samples_lst.transpose
       # Evaluate fitness of complete individuals
-      fits = obj_fn.call(full_inds)
+      fits = parallel_fit ? obj_fn.call(full_inds) : full_inds.map(&obj_fn)
       # Quick cure for NaN fitnesses
       fits.map! { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x }
       @last_fits = fits # allows checking for stagnation
@@ -91,7 +96,6 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
     end
     def load data
-      # raise "Hell!" unless data.size == 2
       fit = -> (*args) { raise "Should never be called" }
       @blocks = data.map do |block_data|
         ndims = block_data.first.size

data/lib/machine_learning_workbench/systems.rb ADDED

	@@ -0,0 +1 @@
1	+ require_relative 'systems/neuroevolution'

data/lib/machine_learning_workbench/systems/neuroevolution.rb ADDED

	@@ -0,0 +1,2 @@
1	+
2	+ "Work in progress"

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: machine_learning_workbench
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - Giuseppe Cuccu
@@ -169,6 +169,7 @@ files:
 - Rakefile
 - bin/console
 - bin/setup
+- examples/neuroevolution.rb
 - lib/machine_learning_workbench.rb
 - lib/machine_learning_workbench/compressor.rb
 - lib/machine_learning_workbench/compressor/vector_quantization.rb
@@ -182,6 +183,8 @@ files:
 - lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb
 - lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb
 - lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb
+- lib/machine_learning_workbench/systems.rb
+- lib/machine_learning_workbench/systems/neuroevolution.rb
 - lib/machine_learning_workbench/tools.rb
 - lib/machine_learning_workbench/tools/execution.rb
 - lib/machine_learning_workbench/tools/imaging.rb