machine_learning_workbench 0.4.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 59f6c7b552e7165e67222a70e5ceb231bf482d69
4
- data.tar.gz: '09e9df7a6e045fadec71456f327f1a8118e31a5d'
3
+ metadata.gz: baafa2eb71e8bbcc83d6a320437c2a0ad8f5544e
4
+ data.tar.gz: 5337ad91449767ca2754b41b2784212bfe5fbae7
5
5
  SHA512:
6
- metadata.gz: 5eae025849e2640b3137128a912cb54ce6f48fad9c27c63462dc08cd787c1a53527c2bcdcd06f0230bc0e7b1b50d8699f3778878ea7331f00a66bbc7ec114baf
7
- data.tar.gz: 6aced52a85fd32650bdc88810c596d3305d2f3efeab2034a0ac73e58f3691dda5a4013e89a79546da033cb5ed81833bb090b0b69b7d7ed32e94d46b900bab165
6
+ metadata.gz: 276d67ea700d5d9a6c0ee450cf0839438c77d7b7ce8dce87173b13a0898d0615cf30cc1f803686cb2da9e76953e601f8e702948853b2332cdffd57f7250ed72f
7
+ data.tar.gz: 3b9e2fdee7f0435d4acd52b902de928705e021f2e1715b58a09128d69cf129e4aaa7ad8f33d0545a74761d0b2345aa8df6a3ac112b7ebb8c67002431e686ce90
@@ -3,7 +3,6 @@
3
3
  # followed by a `bundle install`
4
4
  require 'machine_learning_workbench'
5
5
  # Workbench shorthands
6
- WB = MachineLearningWorkbench
7
6
  XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
8
7
  FFNN = WB::NeuralNetwork::FeedForward
9
8
 
@@ -11,7 +10,7 @@ FFNN = WB::NeuralNetwork::FeedForward
11
10
  XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
12
11
  # A classic [2,2,1] feed-forward network will do: 2 inputs, 2 hidden, 1 output
13
12
  # For other uses, make sure you match the first number to the number of inputs, and
14
- # the last one as the number of outputs; then add as many layers as needed, by
13
+ # the last one as the number of outputs; then add as many layers as needed, by
15
14
  # specifying the size of each. Here we have only one, of size 2.
16
15
  # NOTE: If this totals thousands of weights, you may want to switch to SNES or BDNES
17
16
  # for speed. In the second case, use the function `nweights_per_layer` when instantiating
@@ -2,17 +2,20 @@
2
2
  gpu = false # prepare for switching to GPUs
3
3
  if gpu
4
4
  require 'cumo/narray'
5
- require 'cumo/linalg'
6
5
  Xumo = Cumo
6
+ require 'cumo/linalg'
7
7
  else
8
8
  require 'numo/narray'
9
+ Xumo = Numo
9
10
  # gem `numo-linalg` depends on openblas and lapacke:
10
11
  # `sudo apt install libopenblas-base liblapacke`
11
12
  require 'numo/linalg'
12
- Xumo = Numo
13
13
  end
14
+
15
+ # Shorthands
14
16
  NArray = Xumo::DFloat # set a single data type across the WB for now
15
17
  NMath = Xumo::NMath # shorthand for extended math module
18
+ NLinalg = Xumo::Linalg # shorthand for linear algebra module
16
19
 
17
20
  module MachineLearningWorkbench
18
21
  module Compressor
@@ -24,6 +27,7 @@ module MachineLearningWorkbench
24
27
  module Tools
25
28
  end
26
29
  end
30
+ WB = MachineLearningWorkbench # import MachineLearningWorkbench as WB ;)
27
31
 
28
32
  require_relative 'machine_learning_workbench/monkey'
29
33
  require_relative 'machine_learning_workbench/tools'
@@ -3,11 +3,14 @@ module MachineLearningWorkbench::Compressor
3
3
  # Optimized for online training.
4
4
  class OnlineVectorQuantization < VectorQuantization
5
5
 
6
- attr_reader :min_lrate
6
+ attr_reader :lrate_min, :lrate_min_den, :decay_rate
7
7
 
8
- def initialize min_lrate: 0.01, **opts
8
+ def initialize **opts
9
+ puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
10
+ @lrate_min = opts.delete(:lrate_min) || 0.001
11
+ @lrate_min_den = opts.delete(:lrate_min_den) || 1
12
+ @decay_rate = opts.delete(:decay_rate) || 1
9
13
  super **opts.merge({lrate: nil})
10
- @min_lrate = min_lrate
11
14
  end
12
15
 
13
16
  # Overloading lrate check from original VQ
@@ -17,12 +20,17 @@ module MachineLearningWorkbench::Compressor
17
20
  # @param centr_idx [Integer] index of the centroid
18
21
  # @param lower_bound [Float] minimum learning rate
19
22
  # @note nicely overloads the `attr_reader` of parent class
20
- def lrate centr_idx, lower_bound: min_lrate
21
- [1/ntrains[centr_idx], lower_bound].max
23
+ def lrate centr_idx, min_den: lrate_min_den, lower_bound: lrate_min, decay: decay_rate
24
+ [1.0/(ntrains[centr_idx]*decay+min_den), lower_bound].max
25
+ .tap { |l| puts "centr: #{centr_idx}, ntrains: #{ntrains[centr_idx]}, lrate: #{l}" }
22
26
  end
23
27
 
24
- def train_one *args, **kwargs
25
- raise NotImplementedError, "Remember to overload this using the new lrate(idx)"
28
+ # Train on one vector
29
+ # @return [Integer] index of trained centroid
30
+ def train_one vec
31
+ trg_idx, _simil = most_similar_centr(vec)
32
+ centrs[trg_idx] = centrs[trg_idx] * (1-lrate(trg_idx)) + vec * lrate(trg_idx)
33
+ trg_idx
26
34
  end
27
35
 
28
36
  end
@@ -2,15 +2,18 @@ module MachineLearningWorkbench::Compressor
2
2
 
3
3
  # Standard Vector Quantization
4
4
  class VectorQuantization
5
- attr_reader :ncentrs, :centrs, :dims, :vrange, :lrate, :rng, :ntrains
5
+ attr_reader :ncentrs, :centrs, :dims, :vrange, :init_centr_vrange, :lrate, :simil_type, :rng, :ntrains
6
6
  Verification = MachineLearningWorkbench::Tools::Verification
7
7
 
8
- def initialize ncentrs:, dims:, vrange:, lrate:, rseed: Random.new_seed
8
+ def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, init_centr_vrange: nil, rseed: Random.new_seed
9
+ # TODO: RNG CURRENTLY NOT USED!!
9
10
  @rng = Random.new rseed
10
11
  @ncentrs = ncentrs
11
12
  @dims = Array(dims)
12
13
  check_lrate lrate # hack: so that we can overload it in online_vq
13
14
  @lrate = lrate
15
+ @simil_type = simil_type || :dot
16
+ @init_centr_vrange ||= vrange
14
17
  @vrange = case vrange
15
18
  when Array
16
19
  raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
@@ -19,30 +22,48 @@ module MachineLearningWorkbench::Compressor
19
22
  [vrange.first, vrange.last].map &method(:Float)
20
23
  else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
21
24
  end
22
- @centrs = ncentrs.times.map { new_centr }
25
+ init_centrs
23
26
  @ntrains = [0]*ncentrs # useful to understand what happens
24
27
  end
25
28
 
26
29
  # Verify lrate to be present and withing unit bounds
27
- # As a separate method only so it can be overloaded in online_vq
30
+ # As a separate method only so it can be overloaded in `OnlineVectorQuantization`
28
31
  def check_lrate lrate
29
32
  raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
30
33
  end
31
34
 
35
+ # Initializes a list of centroids
36
+ def init_centrs nc: ncentrs, base: nil, proport: nil
37
+ @centrs = nc.times.map { new_centr base, proport }
38
+ end
39
+
32
40
  # Creates a new (random) centroid
33
- def new_centr
34
- NArray.new(*dims).rand(*vrange)
41
+ # If a base is passed, this is meshed with the random centroid.
42
+ # This is done to facilitate distributing the training across centroids.
43
+ # TODO: USE RNG HERE!!
44
+ def new_centr base=nil, proport=nil
45
+ raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
46
+ # require 'pry'; binding.pry if base.nil? ^ proport.nil?
47
+ ret = NArray.new(*dims).rand(*init_centr_vrange)
48
+ ret = ret * (1-proport) + base * proport if base&&proport
49
+ ret
35
50
  end
36
51
 
52
+ SIMIL = {
53
+ dot: -> (centr, vec) { centr.dot(vec) },
54
+ mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
55
+ }
56
+
37
57
  # Computes similarities between vector and all centroids
38
- def similarities vec
58
+ def similarities vec, type: simil_type
39
59
  raise NotImplementedError if vec.shape.size > 1
40
- centrs.map { |c| c.dot(vec) }
60
+ centrs.map { |centr| SIMIL[type].call centr, vec }
41
61
  # require 'parallel'
42
62
  # Parallel.map(centrs) { |c| c.dot(vec).first }
43
63
  end
44
64
 
45
65
  # Encode a vector
66
+ # TODO: optimize for Numo
46
67
  def encode vec, type: :most_similar
47
68
  simils = similarities vec
48
69
  case type
@@ -52,6 +73,7 @@ module MachineLearningWorkbench::Compressor
52
73
  simils
53
74
  when :ensemble_norm
54
75
  tot = simils.reduce(:+)
76
+ tot = 1 if tot == 0 # HACK: avoid division by zero
55
77
  simils.map { |s| s/tot }
56
78
  else raise ArgumentError, "unrecognized encode type: #{type}"
57
79
  end
@@ -83,19 +105,18 @@ module MachineLearningWorkbench::Compressor
83
105
 
84
106
  # Per-pixel errors in reconstructing vector
85
107
  # @return [NArray] residuals
86
- def reconstr_error vec
87
- reconstruction(vec) - vec
108
+ def reconstr_error vec, code: nil, type: :most_similar
109
+ code ||= encode vec, type: type
110
+ (vec - reconstruction(code, type: type)).abs.sum
88
111
  end
89
112
 
90
113
  # Train on one vector
91
114
  # @return [Integer] index of trained centroid
92
115
  def train_one vec
93
-
94
116
  trg_idx, _simil = most_similar_centr(vec)
95
- # note: uhm that actually looks like a dot product... optimizable?
117
+ # note: uhm that actually looks like a dot product... maybe faster?
96
118
  # `[c[i], vec].dot([1-lrate, lrate])`
97
119
  centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
98
- # Verification.in_range! centrs[trg_idx], vrange # I verified it's not needed
99
120
  trg_idx
100
121
  end
101
122
 
@@ -231,7 +231,7 @@ module MachineLearningWorkbench::Monkey
231
231
  # @param other [NArray] other matrix
232
232
  # @return [NArray]
233
233
  def outer_flat other
234
- # TODO: Numo::NArray should be able to implement this with `#outer` and some other
234
+ # TODO: Xumo::NArray should be able to implement this with `#outer` and some other
235
235
  # function to flatten the right layer -- much faster
236
236
  raise ArgumentError, "Need to pass an operand block" unless block_given?
237
237
  self.class.zeros([self.size, other.size]).tap do |ret|
@@ -258,7 +258,7 @@ module MachineLearningWorkbench::Monkey
258
258
  # Inverses matrix
259
259
  # @return [NArray]
260
260
  def invert
261
- Numo::Linalg.inv self
261
+ NLinalg.inv self
262
262
  end
263
263
  end
264
264
 
@@ -268,7 +268,7 @@ module MachineLearningWorkbench::Monkey
268
268
  def exponential
269
269
  raise ArgumentError if ndim > 2
270
270
  # special case: one-dimensional matrix: just exponentiate the values
271
- return Numo::NMath.exp(self) if (ndim == 1) || shape.include?(1)
271
+ return NMath.exp(self) if (ndim == 1) || shape.include?(1)
272
272
  # at this point we need to validate it is a square matrix
273
273
  raise ArgumentError unless shape.reduce(&:==)
274
274
 
@@ -281,11 +281,11 @@ module MachineLearningWorkbench::Monkey
281
281
  # TODO: this is a simple but outdated method, switch to Pade approximation
282
282
  # https://github.com/scipy/scipy/blob/11509c4a98edded6c59423ac44ca1b7f28fba1fd/scipy/sparse/linalg/matfuncs.py#L557
283
283
 
284
- # e_values, l_e_vectors, r_e_vectors_t = Numo::Linalg.svd self
285
- evals, _wi, _vl, r_evecs = Numo::Linalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
284
+ # e_values, l_e_vectors, r_e_vectors_t = NLinalg.svd self
285
+ evals, _wi, _vl, r_evecs = NLinalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
286
286
  r_evecs_t = r_evecs#.transpose
287
287
  r_evecs_inv = r_evecs_t.invert
288
- evals_exp_dmat = Numo::NMath.exp(evals).diag
288
+ evals_exp_dmat = NMath.exp(evals).diag
289
289
 
290
290
  # l_e_vectors.dot(e_vals_exp_dmat).dot(l_e_vectors.invert)#.transpose
291
291
  r_evecs_t.dot(evals_exp_dmat).dot(r_evecs_inv)
@@ -157,7 +157,7 @@ module MachineLearningWorkbench::NeuralNetwork
157
157
  # Extract and convert the output layer's activation
158
158
  # @return [NArray] the activation of the output layer
159
159
  def out
160
- state.last
160
+ state.last.flatten
161
161
  end
162
162
 
163
163
  ## Activation functions
@@ -166,13 +166,13 @@ module MachineLearningWorkbench::NeuralNetwork
166
166
  def sigmoid k=0.5
167
167
  # k is steepness: 0<k<1 is flatter, 1<k is flatter
168
168
  # flatter makes activation less sensitive, better with large number of inputs
169
- -> (x) { 1.0 / (Numo::NMath.exp(-k * x) + 1.0) }
169
+ -> (x) { 1.0 / (NMath.exp(-k * x) + 1.0) }
170
170
  end
171
171
 
172
172
  # Traditional logistic
173
173
  def logistic
174
174
  -> (x) do
175
- exp = Numo::NMath.exp(x)
175
+ exp = NMath.exp(x)
176
176
  # exp.infinite? ? exp : exp / (1.0 + exp)
177
177
  exp / (1.0 + exp)
178
178
  end
@@ -181,7 +181,7 @@ module MachineLearningWorkbench::NeuralNetwork
181
181
  # LeCun hyperbolic activation
182
182
  # @see http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf Section 4.4
183
183
  def lecun_hyperbolic
184
- -> (x) { 1.7159 * Numo::NMath.tanh(2.0*x/3.0) + 1e-3*x }
184
+ -> (x) { 1.7159 * NMath.tanh(2.0*x/3.0) + 1e-3*x }
185
185
  end
186
186
 
187
187
  # Rectified Linear Unit (ReLU)
@@ -35,7 +35,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
35
35
 
36
36
  # Box-Muller transform: generates standard (unit) normal distribution samples
37
37
  # @return [Float] a single sample from a standard normal distribution
38
- # @note Numo::NArray implements this :) glad to have switched!
38
+ # @note Xumo::NArray implements this :) glad to have switched!
39
39
  def standard_normal_sample
40
40
  rho = Math.sqrt(-2.0 * Math.log(rng.rand))
41
41
  theta = 2 * Math::PI * rng.rand
@@ -80,7 +80,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
80
80
  # Samples a standard normal distribution to construct a NArray of
81
81
  # popsize multivariate samples of length ndims
82
82
  # @return [NArray] standard normal samples
83
- # @note Numo::NArray implements this :) glad to have switched!
83
+ # @note Xumo::NArray implements this :) glad to have switched!
84
84
  def standard_normal_samples
85
85
  NArray.zeros([popsize, ndims]).tap do |ret|
86
86
  ret.each_with_index { |_,*i| ret[*i] = standard_normal_sample }
@@ -104,7 +104,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
104
104
  # matched with individuals sorted by INCREASING fitness. Then reverse order for minimization.
105
105
  # @return standard normal samples sorted by the respective individuals' fitnesses
106
106
  def sorted_inds
107
- # samples = standard_normal_samples # Numo::NArray implements the Box-Muller :)
107
+ # samples = standard_normal_samples # Xumo::NArray implements the Box-Muller :)
108
108
  samples = NArray.new([popsize, ndims]).rand_norm(0,1)
109
109
  inds = move_inds(samples)
110
110
  fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)
@@ -28,7 +28,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
28
28
  def train picks: sorted_inds
29
29
  g_mu = utils.dot(picks)
30
30
  # g_sigma = utils.dot(picks.row_norms**2 - ndims).first # back to scalar
31
- row_norms = Numo::Linalg.norm picks, 2, axis:1
31
+ row_norms = NLinalg.norm picks, 2, axis:1
32
32
  g_sigma = utils.dot(row_norms**2 - ndims)[0] # back to scalar
33
33
  @mu += sigma.dot(g_mu.transpose).transpose * lrate
34
34
  @variance *= Math.exp(g_sigma * lrate / 2)
@@ -24,7 +24,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
24
24
  raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
25
25
  end
26
26
  # Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi)
27
- @log_sigma = Numo::NMath.log(sigma.diagonal).diag
27
+ @log_sigma = NMath.log(sigma.diagonal).diag
28
28
  end
29
29
 
30
30
  def train picks: sorted_inds
@@ -7,10 +7,11 @@ module MachineLearningWorkbench::Tools
7
7
  # @param narr [NArray] numeric matrix to display
8
8
  # @param shape [Array<Integer>] optional reshaping
9
9
  def self.narr_to_img narr, shape: nil
10
+ require 'rmagick'
10
11
  shape ||= narr.shape
11
12
  shape = [1, shape] if shape.kind_of?(Integer) || shape.size == 1
12
13
  # `Image::constitute` requires Float pixels to be in [0,1]
13
- pixels = Norm.feature_scaling narr, to: [0,1]
14
+ pixels = Norm.feature_scaling narr.cast_to(NArray), to: [0,1]
14
15
  Magick::Image.constitute *shape, "I", pixels.to_a.flatten
15
16
  end
16
17
 
@@ -28,6 +29,7 @@ module MachineLearningWorkbench::Tools
28
29
  # @param shape [Array] the true shape of the image (numeric matrix could be flattened)
29
30
  # @param in_fork [bool] whether to execute the display in fork (and continue running)
30
31
  def self.display narr, disp_size: nil, shape: nil, in_fork: true
32
+ require 'rmagick'
31
33
  img = narr_to_img narr, shape: shape
32
34
  img.resize!(*disp_size, Magick::TriangleFilter,0.51) if disp_size
33
35
  if in_fork
@@ -43,6 +45,7 @@ module MachineLearningWorkbench::Tools
43
45
  # @param flat [bool] whether to return a flat array
44
46
  # @param dtype dtype for the numeric matrix, leave `nil` for automatic detection
45
47
  def self.narr_from_png fname, scale: nil, flat: false
48
+ require 'rmagick'
46
49
  img = Magick::ImageList.new(fname).first
47
50
  img.scale!(scale) if scale
48
51
  shape = [img.columns, img.rows]
@@ -4,7 +4,10 @@ module MachineLearningWorkbench::Tools
4
4
  from ||= narr.minmax
5
5
  old_min, old_max = from
6
6
  new_min, new_max = to
7
- (narr-old_min)*(new_max-new_min)/(old_max-old_min)+new_min
7
+ ( (narr-old_min)*(new_max-new_min)/(old_max-old_min) ) + new_min
8
+ rescue ZeroDivisionError
9
+ # require 'pry'; binding.pry
10
+ raise ArgumentError, "If you get here, chances are there's a bug in `from` or `to`"
8
11
  end
9
12
 
10
13
  # @param per_column [bool] wheather to compute stats per-column or matrix-wise
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: machine_learning_workbench
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Giuseppe Cuccu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-27 00:00:00.000000000 Z
11
+ date: 2018-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler