machine_learning_workbench 0.4.5 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/examples/neuroevolution.rb +1 -2
- data/lib/machine_learning_workbench.rb +6 -2
- data/lib/machine_learning_workbench/compressor/online_vector_quantization.rb +15 -7
- data/lib/machine_learning_workbench/compressor/vector_quantization.rb +34 -13
- data/lib/machine_learning_workbench/monkey.rb +6 -6
- data/lib/machine_learning_workbench/neural_network/base.rb +4 -4
- data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +3 -3
- data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/rnes.rb +1 -1
- data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb +1 -1
- data/lib/machine_learning_workbench/tools/imaging.rb +4 -1
- data/lib/machine_learning_workbench/tools/normalization.rb +4 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: baafa2eb71e8bbcc83d6a320437c2a0ad8f5544e
|
4
|
+
data.tar.gz: 5337ad91449767ca2754b41b2784212bfe5fbae7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 276d67ea700d5d9a6c0ee450cf0839438c77d7b7ce8dce87173b13a0898d0615cf30cc1f803686cb2da9e76953e601f8e702948853b2332cdffd57f7250ed72f
|
7
|
+
data.tar.gz: 3b9e2fdee7f0435d4acd52b902de928705e021f2e1715b58a09128d69cf129e4aaa7ad8f33d0545a74761d0b2345aa8df6a3ac112b7ebb8c67002431e686ce90
|
data/examples/neuroevolution.rb
CHANGED
@@ -3,7 +3,6 @@
|
|
3
3
|
# followed by a `bundle install`
|
4
4
|
require 'machine_learning_workbench'
|
5
5
|
# Workbench shorthands
|
6
|
-
WB = MachineLearningWorkbench
|
7
6
|
XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
|
8
7
|
FFNN = WB::NeuralNetwork::FeedForward
|
9
8
|
|
@@ -11,7 +10,7 @@ FFNN = WB::NeuralNetwork::FeedForward
|
|
11
10
|
XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
|
12
11
|
# A classic [2,2,1] feed-forward network will do: 2 inputs, 2 hidden, 1 output
|
13
12
|
# For other uses, make sure you match the first number to the number of inputs, and
|
14
|
-
# the last one as the number of outputs; then add as many layers as needed, by
|
13
|
+
# the last one as the number of outputs; then add as many layers as needed, by
|
15
14
|
# specifying the size of each. Here we have only one, of size 2.
|
16
15
|
# NOTE: If this totals thousands of weights, you may want to switch to SNES or BDNES
|
17
16
|
# for speed. In the second case, use the function `nweights_per_layer` when instantiating
|
@@ -2,17 +2,20 @@
|
|
2
2
|
gpu = false # prepare for switching to GPUs
|
3
3
|
if gpu
|
4
4
|
require 'cumo/narray'
|
5
|
-
require 'cumo/linalg'
|
6
5
|
Xumo = Cumo
|
6
|
+
require 'cumo/linalg'
|
7
7
|
else
|
8
8
|
require 'numo/narray'
|
9
|
+
Xumo = Numo
|
9
10
|
# gem `numo-linalg` depends on openblas and lapacke:
|
10
11
|
# `sudo apt install libopenblas-base liblapacke`
|
11
12
|
require 'numo/linalg'
|
12
|
-
Xumo = Numo
|
13
13
|
end
|
14
|
+
|
15
|
+
# Shorthands
|
14
16
|
NArray = Xumo::DFloat # set a single data type across the WB for now
|
15
17
|
NMath = Xumo::NMath # shorthand for extended math module
|
18
|
+
NLinalg = Xumo::Linalg # shorthand for linear algebra module
|
16
19
|
|
17
20
|
module MachineLearningWorkbench
|
18
21
|
module Compressor
|
@@ -24,6 +27,7 @@ module MachineLearningWorkbench
|
|
24
27
|
module Tools
|
25
28
|
end
|
26
29
|
end
|
30
|
+
WB = MachineLearningWorkbench # import MachineLearningWorkbench as WB ;)
|
27
31
|
|
28
32
|
require_relative 'machine_learning_workbench/monkey'
|
29
33
|
require_relative 'machine_learning_workbench/tools'
|
@@ -3,11 +3,14 @@ module MachineLearningWorkbench::Compressor
|
|
3
3
|
# Optimized for online training.
|
4
4
|
class OnlineVectorQuantization < VectorQuantization
|
5
5
|
|
6
|
-
attr_reader :
|
6
|
+
attr_reader :lrate_min, :lrate_min_den, :decay_rate
|
7
7
|
|
8
|
-
def initialize
|
8
|
+
def initialize **opts
|
9
|
+
puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
|
10
|
+
@lrate_min = opts.delete(:lrate_min) || 0.001
|
11
|
+
@lrate_min_den = opts.delete(:lrate_min_den) || 1
|
12
|
+
@decay_rate = opts.delete(:decay_rate) || 1
|
9
13
|
super **opts.merge({lrate: nil})
|
10
|
-
@min_lrate = min_lrate
|
11
14
|
end
|
12
15
|
|
13
16
|
# Overloading lrate check from original VQ
|
@@ -17,12 +20,17 @@ module MachineLearningWorkbench::Compressor
|
|
17
20
|
# @param centr_idx [Integer] index of the centroid
|
18
21
|
# @param lower_bound [Float] minimum learning rate
|
19
22
|
# @note nicely overloads the `attr_reader` of parent class
|
20
|
-
def lrate centr_idx, lower_bound:
|
21
|
-
[1/ntrains[centr_idx], lower_bound].max
|
23
|
+
def lrate centr_idx, min_den: lrate_min_den, lower_bound: lrate_min, decay: decay_rate
|
24
|
+
[1.0/(ntrains[centr_idx]*decay+min_den), lower_bound].max
|
25
|
+
.tap { |l| puts "centr: #{centr_idx}, ntrains: #{ntrains[centr_idx]}, lrate: #{l}" }
|
22
26
|
end
|
23
27
|
|
24
|
-
|
25
|
-
|
28
|
+
# Train on one vector
|
29
|
+
# @return [Integer] index of trained centroid
|
30
|
+
def train_one vec
|
31
|
+
trg_idx, _simil = most_similar_centr(vec)
|
32
|
+
centrs[trg_idx] = centrs[trg_idx] * (1-lrate(trg_idx)) + vec * lrate(trg_idx)
|
33
|
+
trg_idx
|
26
34
|
end
|
27
35
|
|
28
36
|
end
|
@@ -2,15 +2,18 @@ module MachineLearningWorkbench::Compressor
|
|
2
2
|
|
3
3
|
# Standard Vector Quantization
|
4
4
|
class VectorQuantization
|
5
|
-
attr_reader :ncentrs, :centrs, :dims, :vrange, :lrate, :rng, :ntrains
|
5
|
+
attr_reader :ncentrs, :centrs, :dims, :vrange, :init_centr_vrange, :lrate, :simil_type, :rng, :ntrains
|
6
6
|
Verification = MachineLearningWorkbench::Tools::Verification
|
7
7
|
|
8
|
-
def initialize ncentrs:, dims:, vrange:, lrate:, rseed: Random.new_seed
|
8
|
+
def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, init_centr_vrange: nil, rseed: Random.new_seed
|
9
|
+
# TODO: RNG CURRENTLY NOT USED!!
|
9
10
|
@rng = Random.new rseed
|
10
11
|
@ncentrs = ncentrs
|
11
12
|
@dims = Array(dims)
|
12
13
|
check_lrate lrate # hack: so that we can overload it in online_vq
|
13
14
|
@lrate = lrate
|
15
|
+
@simil_type = simil_type || :dot
|
16
|
+
@init_centr_vrange ||= vrange
|
14
17
|
@vrange = case vrange
|
15
18
|
when Array
|
16
19
|
raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
|
@@ -19,30 +22,48 @@ module MachineLearningWorkbench::Compressor
|
|
19
22
|
[vrange.first, vrange.last].map &method(:Float)
|
20
23
|
else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
|
21
24
|
end
|
22
|
-
|
25
|
+
init_centrs
|
23
26
|
@ntrains = [0]*ncentrs # useful to understand what happens
|
24
27
|
end
|
25
28
|
|
26
29
|
# Verify lrate to be present and withing unit bounds
|
27
|
-
# As a separate method only so it can be overloaded in
|
30
|
+
# As a separate method only so it can be overloaded in `OnlineVectorQuantization`
|
28
31
|
def check_lrate lrate
|
29
32
|
raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
|
30
33
|
end
|
31
34
|
|
35
|
+
# Initializes a list of centroids
|
36
|
+
def init_centrs nc: ncentrs, base: nil, proport: nil
|
37
|
+
@centrs = nc.times.map { new_centr base, proport }
|
38
|
+
end
|
39
|
+
|
32
40
|
# Creates a new (random) centroid
|
33
|
-
|
34
|
-
|
41
|
+
# If a base is passed, this is meshed with the random centroid.
|
42
|
+
# This is done to facilitate distributing the training across centroids.
|
43
|
+
# TODO: USE RNG HERE!!
|
44
|
+
def new_centr base=nil, proport=nil
|
45
|
+
raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
|
46
|
+
# require 'pry'; binding.pry if base.nil? ^ proport.nil?
|
47
|
+
ret = NArray.new(*dims).rand(*init_centr_vrange)
|
48
|
+
ret = ret * (1-proport) + base * proport if base&&proport
|
49
|
+
ret
|
35
50
|
end
|
36
51
|
|
52
|
+
SIMIL = {
|
53
|
+
dot: -> (centr, vec) { centr.dot(vec) },
|
54
|
+
mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
|
55
|
+
}
|
56
|
+
|
37
57
|
# Computes similarities between vector and all centroids
|
38
|
-
def similarities vec
|
58
|
+
def similarities vec, type: simil_type
|
39
59
|
raise NotImplementedError if vec.shape.size > 1
|
40
|
-
centrs.map { |
|
60
|
+
centrs.map { |centr| SIMIL[type].call centr, vec }
|
41
61
|
# require 'parallel'
|
42
62
|
# Parallel.map(centrs) { |c| c.dot(vec).first }
|
43
63
|
end
|
44
64
|
|
45
65
|
# Encode a vector
|
66
|
+
# TODO: optimize for Numo
|
46
67
|
def encode vec, type: :most_similar
|
47
68
|
simils = similarities vec
|
48
69
|
case type
|
@@ -52,6 +73,7 @@ module MachineLearningWorkbench::Compressor
|
|
52
73
|
simils
|
53
74
|
when :ensemble_norm
|
54
75
|
tot = simils.reduce(:+)
|
76
|
+
tot = 1 if tot == 0 # HACK: avoid division by zero
|
55
77
|
simils.map { |s| s/tot }
|
56
78
|
else raise ArgumentError, "unrecognized encode type: #{type}"
|
57
79
|
end
|
@@ -83,19 +105,18 @@ module MachineLearningWorkbench::Compressor
|
|
83
105
|
|
84
106
|
# Per-pixel errors in reconstructing vector
|
85
107
|
# @return [NArray] residuals
|
86
|
-
def reconstr_error vec
|
87
|
-
|
108
|
+
def reconstr_error vec, code: nil, type: :most_similar
|
109
|
+
code ||= encode vec, type: type
|
110
|
+
(vec - reconstruction(code, type: type)).abs.sum
|
88
111
|
end
|
89
112
|
|
90
113
|
# Train on one vector
|
91
114
|
# @return [Integer] index of trained centroid
|
92
115
|
def train_one vec
|
93
|
-
|
94
116
|
trg_idx, _simil = most_similar_centr(vec)
|
95
|
-
# note: uhm that actually looks like a dot product...
|
117
|
+
# note: uhm that actually looks like a dot product... maybe faster?
|
96
118
|
# `[c[i], vec].dot([1-lrate, lrate])`
|
97
119
|
centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
|
98
|
-
# Verification.in_range! centrs[trg_idx], vrange # I verified it's not needed
|
99
120
|
trg_idx
|
100
121
|
end
|
101
122
|
|
@@ -231,7 +231,7 @@ module MachineLearningWorkbench::Monkey
|
|
231
231
|
# @param other [NArray] other matrix
|
232
232
|
# @return [NArray]
|
233
233
|
def outer_flat other
|
234
|
-
# TODO:
|
234
|
+
# TODO: Xumo::NArray should be able to implement this with `#outer` and some other
|
235
235
|
# function to flatten the right layer -- much faster
|
236
236
|
raise ArgumentError, "Need to pass an operand block" unless block_given?
|
237
237
|
self.class.zeros([self.size, other.size]).tap do |ret|
|
@@ -258,7 +258,7 @@ module MachineLearningWorkbench::Monkey
|
|
258
258
|
# Inverses matrix
|
259
259
|
# @return [NArray]
|
260
260
|
def invert
|
261
|
-
|
261
|
+
NLinalg.inv self
|
262
262
|
end
|
263
263
|
end
|
264
264
|
|
@@ -268,7 +268,7 @@ module MachineLearningWorkbench::Monkey
|
|
268
268
|
def exponential
|
269
269
|
raise ArgumentError if ndim > 2
|
270
270
|
# special case: one-dimensional matrix: just exponentiate the values
|
271
|
-
return
|
271
|
+
return NMath.exp(self) if (ndim == 1) || shape.include?(1)
|
272
272
|
# at this point we need to validate it is a square matrix
|
273
273
|
raise ArgumentError unless shape.reduce(&:==)
|
274
274
|
|
@@ -281,11 +281,11 @@ module MachineLearningWorkbench::Monkey
|
|
281
281
|
# TODO: this is a simple but outdated method, switch to Pade approximation
|
282
282
|
# https://github.com/scipy/scipy/blob/11509c4a98edded6c59423ac44ca1b7f28fba1fd/scipy/sparse/linalg/matfuncs.py#L557
|
283
283
|
|
284
|
-
# e_values, l_e_vectors, r_e_vectors_t =
|
285
|
-
evals, _wi, _vl, r_evecs =
|
284
|
+
# e_values, l_e_vectors, r_e_vectors_t = NLinalg.svd self
|
285
|
+
evals, _wi, _vl, r_evecs = NLinalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
|
286
286
|
r_evecs_t = r_evecs#.transpose
|
287
287
|
r_evecs_inv = r_evecs_t.invert
|
288
|
-
evals_exp_dmat =
|
288
|
+
evals_exp_dmat = NMath.exp(evals).diag
|
289
289
|
|
290
290
|
# l_e_vectors.dot(e_vals_exp_dmat).dot(l_e_vectors.invert)#.transpose
|
291
291
|
r_evecs_t.dot(evals_exp_dmat).dot(r_evecs_inv)
|
@@ -157,7 +157,7 @@ module MachineLearningWorkbench::NeuralNetwork
|
|
157
157
|
# Extract and convert the output layer's activation
|
158
158
|
# @return [NArray] the activation of the output layer
|
159
159
|
def out
|
160
|
-
state.last
|
160
|
+
state.last.flatten
|
161
161
|
end
|
162
162
|
|
163
163
|
## Activation functions
|
@@ -166,13 +166,13 @@ module MachineLearningWorkbench::NeuralNetwork
|
|
166
166
|
def sigmoid k=0.5
|
167
167
|
# k is steepness: 0<k<1 is flatter, 1<k is flatter
|
168
168
|
# flatter makes activation less sensitive, better with large number of inputs
|
169
|
-
-> (x) { 1.0 / (
|
169
|
+
-> (x) { 1.0 / (NMath.exp(-k * x) + 1.0) }
|
170
170
|
end
|
171
171
|
|
172
172
|
# Traditional logistic
|
173
173
|
def logistic
|
174
174
|
-> (x) do
|
175
|
-
exp =
|
175
|
+
exp = NMath.exp(x)
|
176
176
|
# exp.infinite? ? exp : exp / (1.0 + exp)
|
177
177
|
exp / (1.0 + exp)
|
178
178
|
end
|
@@ -181,7 +181,7 @@ module MachineLearningWorkbench::NeuralNetwork
|
|
181
181
|
# LeCun hyperbolic activation
|
182
182
|
# @see http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf Section 4.4
|
183
183
|
def lecun_hyperbolic
|
184
|
-
-> (x) { 1.7159 *
|
184
|
+
-> (x) { 1.7159 * NMath.tanh(2.0*x/3.0) + 1e-3*x }
|
185
185
|
end
|
186
186
|
|
187
187
|
# Rectified Linear Unit (ReLU)
|
@@ -35,7 +35,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
35
35
|
|
36
36
|
# Box-Muller transform: generates standard (unit) normal distribution samples
|
37
37
|
# @return [Float] a single sample from a standard normal distribution
|
38
|
-
# @note
|
38
|
+
# @note Xumo::NArray implements this :) glad to have switched!
|
39
39
|
def standard_normal_sample
|
40
40
|
rho = Math.sqrt(-2.0 * Math.log(rng.rand))
|
41
41
|
theta = 2 * Math::PI * rng.rand
|
@@ -80,7 +80,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
80
80
|
# Samples a standard normal distribution to construct a NArray of
|
81
81
|
# popsize multivariate samples of length ndims
|
82
82
|
# @return [NArray] standard normal samples
|
83
|
-
# @note
|
83
|
+
# @note Xumo::NArray implements this :) glad to have switched!
|
84
84
|
def standard_normal_samples
|
85
85
|
NArray.zeros([popsize, ndims]).tap do |ret|
|
86
86
|
ret.each_with_index { |_,*i| ret[*i] = standard_normal_sample }
|
@@ -104,7 +104,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
104
104
|
# matched with individuals sorted by INCREASING fitness. Then reverse order for minimization.
|
105
105
|
# @return standard normal samples sorted by the respective individuals' fitnesses
|
106
106
|
def sorted_inds
|
107
|
-
# samples = standard_normal_samples #
|
107
|
+
# samples = standard_normal_samples # Xumo::NArray implements the Box-Muller :)
|
108
108
|
samples = NArray.new([popsize, ndims]).rand_norm(0,1)
|
109
109
|
inds = move_inds(samples)
|
110
110
|
fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)
|
@@ -28,7 +28,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
28
28
|
def train picks: sorted_inds
|
29
29
|
g_mu = utils.dot(picks)
|
30
30
|
# g_sigma = utils.dot(picks.row_norms**2 - ndims).first # back to scalar
|
31
|
-
row_norms =
|
31
|
+
row_norms = NLinalg.norm picks, 2, axis:1
|
32
32
|
g_sigma = utils.dot(row_norms**2 - ndims)[0] # back to scalar
|
33
33
|
@mu += sigma.dot(g_mu.transpose).transpose * lrate
|
34
34
|
@variance *= Math.exp(g_sigma * lrate / 2)
|
@@ -24,7 +24,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
|
|
24
24
|
raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
|
25
25
|
end
|
26
26
|
# Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi)
|
27
|
-
@log_sigma =
|
27
|
+
@log_sigma = NMath.log(sigma.diagonal).diag
|
28
28
|
end
|
29
29
|
|
30
30
|
def train picks: sorted_inds
|
@@ -7,10 +7,11 @@ module MachineLearningWorkbench::Tools
|
|
7
7
|
# @param narr [NArray] numeric matrix to display
|
8
8
|
# @param shape [Array<Integer>] optional reshaping
|
9
9
|
def self.narr_to_img narr, shape: nil
|
10
|
+
require 'rmagick'
|
10
11
|
shape ||= narr.shape
|
11
12
|
shape = [1, shape] if shape.kind_of?(Integer) || shape.size == 1
|
12
13
|
# `Image::constitute` requires Float pixels to be in [0,1]
|
13
|
-
pixels = Norm.feature_scaling narr, to: [0,1]
|
14
|
+
pixels = Norm.feature_scaling narr.cast_to(NArray), to: [0,1]
|
14
15
|
Magick::Image.constitute *shape, "I", pixels.to_a.flatten
|
15
16
|
end
|
16
17
|
|
@@ -28,6 +29,7 @@ module MachineLearningWorkbench::Tools
|
|
28
29
|
# @param shape [Array] the true shape of the image (numeric matrix could be flattened)
|
29
30
|
# @param in_fork [bool] whether to execute the display in fork (and continue running)
|
30
31
|
def self.display narr, disp_size: nil, shape: nil, in_fork: true
|
32
|
+
require 'rmagick'
|
31
33
|
img = narr_to_img narr, shape: shape
|
32
34
|
img.resize!(*disp_size, Magick::TriangleFilter,0.51) if disp_size
|
33
35
|
if in_fork
|
@@ -43,6 +45,7 @@ module MachineLearningWorkbench::Tools
|
|
43
45
|
# @param flat [bool] whether to return a flat array
|
44
46
|
# @param dtype dtype for the numeric matrix, leave `nil` for automatic detection
|
45
47
|
def self.narr_from_png fname, scale: nil, flat: false
|
48
|
+
require 'rmagick'
|
46
49
|
img = Magick::ImageList.new(fname).first
|
47
50
|
img.scale!(scale) if scale
|
48
51
|
shape = [img.columns, img.rows]
|
@@ -4,7 +4,10 @@ module MachineLearningWorkbench::Tools
|
|
4
4
|
from ||= narr.minmax
|
5
5
|
old_min, old_max = from
|
6
6
|
new_min, new_max = to
|
7
|
-
(narr-old_min)*(new_max-new_min)/(old_max-old_min)+new_min
|
7
|
+
( (narr-old_min)*(new_max-new_min)/(old_max-old_min) ) + new_min
|
8
|
+
rescue ZeroDivisionError
|
9
|
+
# require 'pry'; binding.pry
|
10
|
+
raise ArgumentError, "If you get here, chances are there's a bug in `from` or `to`"
|
8
11
|
end
|
9
12
|
|
10
13
|
# @param per_column [bool] wheather to compute stats per-column or matrix-wise
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: machine_learning_workbench
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giuseppe Cuccu
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|