machine_learning_workbench 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -1
- data/README.md +16 -3
- data/bin/console +4 -4
- data/bin/setup +3 -0
- data/examples/neuroevolution.rb +76 -0
- data/lib/machine_learning_workbench.rb +0 -1
- data/lib/machine_learning_workbench/neural_network/base.rb +3 -3
- data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +11 -4
- data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb +10 -6
- data/lib/machine_learning_workbench/systems.rb +1 -0
- data/lib/machine_learning_workbench/systems/neuroevolution.rb +2 -0
- metadata +4 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07202eab431a1fcddeb2f8c9cc669a3f63fa762d
|
4
|
+
data.tar.gz: 64c17b81b9a8476e664a40fb64c1faec3e8fb807
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 24acec6a1948299718e10b93a8d22e983212af0bb255deb78e3e60ab7d118316b2c6561d59c23508098fe67b14d1edb6e539ab4b1e28d5e0fd44e85618ecc43d
|
7
|
+
data.tar.gz: 4123e04bb2eb291fb71accfe9041cab5dd474b9a302d3e26a589dd34f96491d3cddeedbde50024bb7deca62cedbd9b5161d98b43dc2e016ab8bcf8ea03eb9316
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
# Machine Learning Workbench
|
1
|
+
# [Machine Learning Workbench](https://github.com/giuse/machine_learning_workbench)
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/machine_learning_workbench.svg)](https://badge.fury.io/rb/machine_learning_workbench)
|
4
|
+
[![Build Status](https://travis-ci.org/giuse/machine_learning_workbench.svg?branch=master)](https://travis-ci.org/giuse/machine_learning_workbench)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/giuse/machine_learning_workbench/badges/gpa.svg)](https://codeclimate.com/github/giuse/machine_learning_workbench)
|
2
6
|
|
3
7
|
This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
|
4
8
|
|
@@ -20,14 +24,23 @@ Or install it yourself as:
|
|
20
24
|
|
21
25
|
## Usage
|
22
26
|
|
23
|
-
|
27
|
+
TLDR: Check out [the `examples` directory](examples), e.g. [this script](examples/neuroevolution.rb).
|
28
|
+
|
29
|
+
This library is thought as a practical workbench: there is plenty of tools hanging, each has multiple uses and applications, and as such it is built as atomic and flexible as possible. Folders [in the lib structure](lib/machine_learning_workbench) categorize them them.
|
30
|
+
|
31
|
+
The [systems directory](lib/machine_learning_workbench/systems) holds few examples of how to bring them together in higher abstractions, i.e. as _compound tools_.
|
32
|
+
For example, a [neuroevolution setup](lib/machine_learning_workbench/systems/neuroevolution.rb) brings together evolutionary computation and neural networks.
|
33
|
+
|
34
|
+
For an example of how to build it from scratch, check this [neuroevolution script](examples/neuroevolution.rb). To run it, use `bundle exec ruby examples/neuroevolution.rb`
|
35
|
+
|
24
36
|
|
25
37
|
## Development
|
26
38
|
|
27
|
-
After
|
39
|
+
After cloning the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
28
40
|
|
29
41
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
30
42
|
|
43
|
+
|
31
44
|
## Contributing
|
32
45
|
|
33
46
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/machine_learning_workbench.
|
data/bin/console
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'machine_learning_workbench'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
|
-
|
10
|
-
require "pry"
|
9
|
+
require 'pry'
|
11
10
|
Pry.start
|
12
11
|
|
12
|
+
# alternatively:
|
13
13
|
# require "irb"
|
14
14
|
# IRB.start(__FILE__)
|
data/bin/setup
CHANGED
@@ -0,0 +1,76 @@
|
|
1
|
+
# Make sure the gem is installed first with `gem install machine_learning_workbench`
|
2
|
+
# Alternatively, add `gem 'machine_learning_workbench'` to your Gemfile if using Bundle,
|
3
|
+
# followed by a `bundle install`
|
4
|
+
require 'machine_learning_workbench'
|
5
|
+
# Workbench shorthands
|
6
|
+
WB = MachineLearningWorkbench
|
7
|
+
XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
|
8
|
+
FFNN = WB::NeuralNetwork::FeedForward
|
9
|
+
|
10
|
+
# Let's address the XOR problem, as it requires nonlinear fitting
|
11
|
+
XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
|
12
|
+
# A classic [2,2,1] feed-forward network will do: 2 inputs, 2 hidden, 1 output
|
13
|
+
# For other uses, make sure you match the first number to the number of inputs, and
|
14
|
+
# the last one as the number of outputs; then add as many layers as needed, by
|
15
|
+
# specifying the size of each. Here we have only one, of size 2.
|
16
|
+
# NOTE: If this totals thousands of weights, you may want to switch to SNES or BDNES
|
17
|
+
# for speed. In the second case, use the function `nweights_per_layer` when instantiating
|
18
|
+
# BDNES rather than `nweights`.
|
19
|
+
NET = FFNN.new [2,2,1], act_fn: :logistic
|
20
|
+
# Note: the process is exactly the same, from instantiation to training, for recurrent
|
21
|
+
# networks using the class `WB::NeuralNetwork::Recursive`.
|
22
|
+
# Of course RNNs should be applied to sequential tasks, while XOR is static
|
23
|
+
|
24
|
+
# We will search for the network's weights with a black-box optimization algorithm
|
25
|
+
# This means we will search for arrays of numbers, which need to be scored.
|
26
|
+
# The scoring process will work as follows: use the numbers as weights for the neural
|
27
|
+
# network, test the network on classifying the 4 cases of XOR, use that count as the
|
28
|
+
# score for the weights (original array of numbers).
|
29
|
+
|
30
|
+
# Hence the fitness looks as follows:
|
31
|
+
def fitness weights
|
32
|
+
# Each list of weights uniquely defines a neural network
|
33
|
+
NET.load_weights weights
|
34
|
+
# Activate the network on each of the XOR instances
|
35
|
+
# - prediction: the output of the network
|
36
|
+
# - observation: correct value, our target
|
37
|
+
pred_obs = XOR.map do |input, obs|
|
38
|
+
# The network can have an arbitrary number of output neurons
|
39
|
+
# Since here we have only one, we extract the value calling `#first`
|
40
|
+
output = NET.activate(input).first
|
41
|
+
# Here we interpret the output as classification
|
42
|
+
pred = output > 0.5 ? 1 : 0
|
43
|
+
# Finally accumulate prediction-observation pairs
|
44
|
+
[pred, obs]
|
45
|
+
end
|
46
|
+
# To build a score out of this, we count the number of correct classifications
|
47
|
+
score = Float(pred_obs.count { |pr, ob| pr == ob })
|
48
|
+
# That's it, this will score the weights based on their network's performance
|
49
|
+
end
|
50
|
+
|
51
|
+
# Next comes initializing the black-box stochastic optimization algorithm
|
52
|
+
# We are searching for the network's weights, this gives us the search space dimensionality
|
53
|
+
# We'll use XNES as we are working with less than 100 dimensions (weights)
|
54
|
+
nes = XNES.new NET.nweights, method(:fitness), :max, rseed: 15
|
55
|
+
# Note: the random seed is fixed here to ensure the task is solved in one try in few iterations
|
56
|
+
# In a real task, best using an over-large network, more iterations, and try several seeds
|
57
|
+
|
58
|
+
# NOTE: In practical applications it is best to delegate parallelization to the fitness
|
59
|
+
# function instead of computing the fitness of one individual at a time. This can be
|
60
|
+
# achieved by passing an objective function defined on a _list_ of weight-lists, and
|
61
|
+
# setting the `parallel_fit` switch to `true`:
|
62
|
+
# nes = XNES.new NET.nweights,
|
63
|
+
# -> (genotypes) { Parallel.map genotypes, &method(:fitness) },
|
64
|
+
# :max, rseed: 15, parallel_fit: true
|
65
|
+
|
66
|
+
|
67
|
+
# Nothing left but to run the optimization algorithm, few epochs here will suffice
|
68
|
+
50.times { nes.train }
|
69
|
+
# OK! now remember, `NET` currently holds the weights of the last evaluation
|
70
|
+
# Let's fetch the best individual found so far
|
71
|
+
best_fit, best_weights = nes.best
|
72
|
+
# Let's run them again to check they work
|
73
|
+
result = fitness best_weights # careful here if you defined a parallel `fitness`
|
74
|
+
puts "The found network achieves a score of #{result} out of 4 in the XOR task"
|
75
|
+
puts "Weights: #{best_weights}"
|
76
|
+
puts "Done!"
|
@@ -123,7 +123,7 @@ module MachineLearningWorkbench::NeuralNetwork
|
|
123
123
|
# @return [true] always true. If something's wrong it simply fails, and if
|
124
124
|
# all goes well there's nothing to return but a confirmation to the caller.
|
125
125
|
def load_weights weights
|
126
|
-
raise
|
126
|
+
raise ArgumentError unless weights.size == nweights
|
127
127
|
weights_iter = weights.each
|
128
128
|
@layers = layer_shapes.collect do |shape|
|
129
129
|
NMatrix.new(shape, dtype: :float64) { weights_iter.next }
|
@@ -144,8 +144,8 @@ module MachineLearningWorkbench::NeuralNetwork
|
|
144
144
|
# @param input [Array<Float>] the given input
|
145
145
|
# @return [Array] the activation of the output layer
|
146
146
|
def activate input
|
147
|
-
raise
|
148
|
-
raise
|
147
|
+
raise ArgumentError unless input.size == struct.first
|
148
|
+
raise ArgumentError unless input.is_a? Array
|
149
149
|
# load input in first state
|
150
150
|
@state[0][0, 0..-2] = input
|
151
151
|
# activate layers in sequence
|
@@ -2,17 +2,24 @@
|
|
2
2
|
module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
3
3
|
# Natural Evolution Strategies base class
|
4
4
|
class Base
|
5
|
-
attr_reader :ndims, :mu, :sigma, :opt_type, :obj_fn, :id, :rng, :last_fits, :best
|
5
|
+
attr_reader :ndims, :mu, :sigma, :opt_type, :obj_fn, :parallel_fit, :id, :rng, :last_fits, :best
|
6
6
|
|
7
7
|
# NES object initialization
|
8
8
|
# @param ndims [Integer] number of parameters to optimize
|
9
9
|
# @param obj_fn [#call] any object defining a #call method (Proc, lambda, custom class)
|
10
10
|
# @param opt_type [:min, :max] select minimization / maximization of obj_fn
|
11
11
|
# @param rseed [Integer] allow for deterministic execution on rseed provided
|
12
|
-
|
12
|
+
# @param mu_init [Numeric] values to initalize the distribution's mean
|
13
|
+
# @param sigma_init [Numeric] values to initialize the distribution's covariance
|
14
|
+
# @param parallel_fit [boolean] whether the `obj_fn` should be passed all the individuals
|
15
|
+
# together. In the canonical case the fitness function always scores a single individual;
|
16
|
+
# in practical cases though it is easier to delegate the scoring parallelization to the
|
17
|
+
# external fitness function. Turning this to `true` will make the algorithm pass _an
|
18
|
+
# Array_ of individuals to the fitness function, rather than a single instance.
|
19
|
+
def initialize ndims, obj_fn, opt_type, rseed: nil, mu_init: 0, sigma_init: 1, parallel_fit: false
|
13
20
|
raise ArgumentError unless [:min, :max].include? opt_type
|
14
21
|
raise ArgumentError unless obj_fn.respond_to? :call
|
15
|
-
@ndims, @opt_type, @obj_fn = ndims, opt_type, obj_fn
|
22
|
+
@ndims, @opt_type, @obj_fn, @parallel_fit = ndims, opt_type, obj_fn, parallel_fit
|
16
23
|
@id = NMatrix.identity(ndims, dtype: :float64)
|
17
24
|
rseed ||= Random.new_seed
|
18
25
|
# puts "NES rseed: #{s}" # currently disabled
|
@@ -89,7 +96,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
89
96
|
def sorted_inds
|
90
97
|
samples = standard_normal_samples
|
91
98
|
inds = move_inds(samples).to_a
|
92
|
-
fits = obj_fn.call(inds)
|
99
|
+
fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)
|
93
100
|
# Quick cure for NaN fitnesses
|
94
101
|
fits.map! { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x }
|
95
102
|
@last_fits = fits # allows checking for stagnation
|
@@ -5,15 +5,20 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
5
5
|
|
6
6
|
MAX_RSEED = 10**Random.new_seed.size # same range as Random.new_seed
|
7
7
|
|
8
|
-
attr_reader :ndims_lst, :obj_fn, :opt_type, :blocks, :popsize, :rng,
|
8
|
+
attr_reader :ndims_lst, :obj_fn, :opt_type, :parallel_fit, :blocks, :popsize, :rng,
|
9
9
|
:best, :last_fits
|
10
10
|
|
11
|
-
#
|
12
|
-
|
11
|
+
# Initialize a list of XNES, one for each block
|
12
|
+
# see class `Base` for the description of the rest of the arguments.
|
13
|
+
# @param ndims_lst [Array<Integer>] list of sizes for each block in the block-diagonal
|
14
|
+
# matrix. Note: entire (reconstructed) individuals will be passed to the `obj_fn`
|
15
|
+
# regardless of the division here described.
|
16
|
+
# @param init_opts [Hash] the rest of the options will be passed directly to XNES
|
17
|
+
def initialize ndims_lst, obj_fn, opt_type, parallel_fit: false, rseed: nil, **init_opts
|
13
18
|
# mu_init: 0, sigma_init: 1
|
14
19
|
# init_opts = {rseed: rseed, mu_init: mu_init, sigma_init: sigma_init}
|
15
20
|
# TODO: accept list of `mu_init`s and `sigma_init`s
|
16
|
-
@ndims_lst, @obj_fn, @opt_type = ndims_lst, obj_fn, opt_type
|
21
|
+
@ndims_lst, @obj_fn, @opt_type, @parallel_fit = ndims_lst, obj_fn, opt_type, parallel_fit
|
17
22
|
block_fit = -> (*args) { raise "Should never be called" }
|
18
23
|
# the BD-NES seed should ensure deterministic reproducibility
|
19
24
|
# but each block should have a different seed
|
@@ -48,7 +53,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
48
53
|
full_samples = samples_lst.transpose
|
49
54
|
|
50
55
|
# Evaluate fitness of complete individuals
|
51
|
-
fits = obj_fn.call(full_inds)
|
56
|
+
fits = parallel_fit ? obj_fn.call(full_inds) : full_inds.map(&obj_fn)
|
52
57
|
# Quick cure for NaN fitnesses
|
53
58
|
fits.map! { |x| x.nan? ? (opt_type==:max ? -1 : 1) * Float::INFINITY : x }
|
54
59
|
@last_fits = fits # allows checking for stagnation
|
@@ -91,7 +96,6 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
91
96
|
end
|
92
97
|
|
93
98
|
def load data
|
94
|
-
# raise "Hell!" unless data.size == 2
|
95
99
|
fit = -> (*args) { raise "Should never be called" }
|
96
100
|
@blocks = data.map do |block_data|
|
97
101
|
ndims = block_data.first.size
|
@@ -0,0 +1 @@
|
|
1
|
+
require_relative 'systems/neuroevolution'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: machine_learning_workbench
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giuseppe Cuccu
|
@@ -169,6 +169,7 @@ files:
|
|
169
169
|
- Rakefile
|
170
170
|
- bin/console
|
171
171
|
- bin/setup
|
172
|
+
- examples/neuroevolution.rb
|
172
173
|
- lib/machine_learning_workbench.rb
|
173
174
|
- lib/machine_learning_workbench/compressor.rb
|
174
175
|
- lib/machine_learning_workbench/compressor/vector_quantization.rb
|
@@ -182,6 +183,8 @@ files:
|
|
182
183
|
- lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb
|
183
184
|
- lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb
|
184
185
|
- lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb
|
186
|
+
- lib/machine_learning_workbench/systems.rb
|
187
|
+
- lib/machine_learning_workbench/systems/neuroevolution.rb
|
185
188
|
- lib/machine_learning_workbench/tools.rb
|
186
189
|
- lib/machine_learning_workbench/tools/execution.rb
|
187
190
|
- lib/machine_learning_workbench/tools/imaging.rb
|