machine_learning_workbench 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 59f6c7b552e7165e67222a70e5ceb231bf482d69
4
- data.tar.gz: '09e9df7a6e045fadec71456f327f1a8118e31a5d'
3
+ metadata.gz: baafa2eb71e8bbcc83d6a320437c2a0ad8f5544e
4
+ data.tar.gz: 5337ad91449767ca2754b41b2784212bfe5fbae7
5
5
  SHA512:
6
- metadata.gz: 5eae025849e2640b3137128a912cb54ce6f48fad9c27c63462dc08cd787c1a53527c2bcdcd06f0230bc0e7b1b50d8699f3778878ea7331f00a66bbc7ec114baf
7
- data.tar.gz: 6aced52a85fd32650bdc88810c596d3305d2f3efeab2034a0ac73e58f3691dda5a4013e89a79546da033cb5ed81833bb090b0b69b7d7ed32e94d46b900bab165
6
+ metadata.gz: 276d67ea700d5d9a6c0ee450cf0839438c77d7b7ce8dce87173b13a0898d0615cf30cc1f803686cb2da9e76953e601f8e702948853b2332cdffd57f7250ed72f
7
+ data.tar.gz: 3b9e2fdee7f0435d4acd52b902de928705e021f2e1715b58a09128d69cf129e4aaa7ad8f33d0545a74761d0b2345aa8df6a3ac112b7ebb8c67002431e686ce90
@@ -3,7 +3,6 @@
3
3
  # followed by a `bundle install`
4
4
  require 'machine_learning_workbench'
5
5
  # Workbench shorthands
6
- WB = MachineLearningWorkbench
7
6
  XNES = WB::Optimizer::NaturalEvolutionStrategies::XNES
8
7
  FFNN = WB::NeuralNetwork::FeedForward
9
8
 
@@ -11,7 +10,7 @@ FFNN = WB::NeuralNetwork::FeedForward
11
10
  XOR = {[0,0] => 0, [1,0] => 1, [0,1] => 1, [1,1] => 0}
12
11
  # A classic [2,2,1] feed-forward network will do: 2 inputs, 2 hidden, 1 output
13
12
  # For other uses, make sure you match the first number to the number of inputs, and
14
- # the last one as the number of outputs; then add as many layers as needed, by
13
+ # the last one as the number of outputs; then add as many layers as needed, by
15
14
  # specifying the size of each. Here we have only one, of size 2.
16
15
  # NOTE: If this totals thousands of weights, you may want to switch to SNES or BDNES
17
16
  # for speed. In the second case, use the function `nweights_per_layer` when instantiating
@@ -2,17 +2,20 @@
2
2
  gpu = false # prepare for switching to GPUs
3
3
  if gpu
4
4
  require 'cumo/narray'
5
- require 'cumo/linalg'
6
5
  Xumo = Cumo
6
+ require 'cumo/linalg'
7
7
  else
8
8
  require 'numo/narray'
9
+ Xumo = Numo
9
10
  # gem `numo-linalg` depends on openblas and lapacke:
10
11
  # `sudo apt install libopenblas-base liblapacke`
11
12
  require 'numo/linalg'
12
- Xumo = Numo
13
13
  end
14
+
15
+ # Shorthands
14
16
  NArray = Xumo::DFloat # set a single data type across the WB for now
15
17
  NMath = Xumo::NMath # shorthand for extended math module
18
+ NLinalg = Xumo::Linalg # shorthand for linear algebra module
16
19
 
17
20
  module MachineLearningWorkbench
18
21
  module Compressor
@@ -24,6 +27,7 @@ module MachineLearningWorkbench
24
27
  module Tools
25
28
  end
26
29
  end
30
+ WB = MachineLearningWorkbench # import MachineLearningWorkbench as WB ;)
27
31
 
28
32
  require_relative 'machine_learning_workbench/monkey'
29
33
  require_relative 'machine_learning_workbench/tools'
@@ -3,11 +3,14 @@ module MachineLearningWorkbench::Compressor
3
3
  # Optimized for online training.
4
4
  class OnlineVectorQuantization < VectorQuantization
5
5
 
6
- attr_reader :min_lrate
6
+ attr_reader :lrate_min, :lrate_min_den, :decay_rate
7
7
 
8
- def initialize min_lrate: 0.01, **opts
8
+ def initialize **opts
9
+ puts "Ignoring learning rate: `lrate: #{opts[:lrate]}`" if opts[:lrate]
10
+ @lrate_min = opts.delete(:lrate_min) || 0.001
11
+ @lrate_min_den = opts.delete(:lrate_min_den) || 1
12
+ @decay_rate = opts.delete(:decay_rate) || 1
9
13
  super **opts.merge({lrate: nil})
10
- @min_lrate = min_lrate
11
14
  end
12
15
 
13
16
  # Overloading lrate check from original VQ
@@ -17,12 +20,17 @@ module MachineLearningWorkbench::Compressor
17
20
  # @param centr_idx [Integer] index of the centroid
18
21
  # @param lower_bound [Float] minimum learning rate
19
22
  # @note nicely overloads the `attr_reader` of parent class
20
- def lrate centr_idx, lower_bound: min_lrate
21
- [1/ntrains[centr_idx], lower_bound].max
23
+ def lrate centr_idx, min_den: lrate_min_den, lower_bound: lrate_min, decay: decay_rate
24
+ [1.0/(ntrains[centr_idx]*decay+min_den), lower_bound].max
25
+ .tap { |l| puts "centr: #{centr_idx}, ntrains: #{ntrains[centr_idx]}, lrate: #{l}" }
22
26
  end
23
27
 
24
- def train_one *args, **kwargs
25
- raise NotImplementedError, "Remember to overload this using the new lrate(idx)"
28
+ # Train on one vector
29
+ # @return [Integer] index of trained centroid
30
+ def train_one vec
31
+ trg_idx, _simil = most_similar_centr(vec)
32
+ centrs[trg_idx] = centrs[trg_idx] * (1-lrate(trg_idx)) + vec * lrate(trg_idx)
33
+ trg_idx
26
34
  end
27
35
 
28
36
  end
@@ -2,15 +2,18 @@ module MachineLearningWorkbench::Compressor
2
2
 
3
3
  # Standard Vector Quantization
4
4
  class VectorQuantization
5
- attr_reader :ncentrs, :centrs, :dims, :vrange, :lrate, :rng, :ntrains
5
+ attr_reader :ncentrs, :centrs, :dims, :vrange, :init_centr_vrange, :lrate, :simil_type, :rng, :ntrains
6
6
  Verification = MachineLearningWorkbench::Tools::Verification
7
7
 
8
- def initialize ncentrs:, dims:, vrange:, lrate:, rseed: Random.new_seed
8
+ def initialize ncentrs:, dims:, vrange:, lrate:, simil_type: nil, init_centr_vrange: nil, rseed: Random.new_seed
9
+ # TODO: RNG CURRENTLY NOT USED!!
9
10
  @rng = Random.new rseed
10
11
  @ncentrs = ncentrs
11
12
  @dims = Array(dims)
12
13
  check_lrate lrate # hack: so that we can overload it in online_vq
13
14
  @lrate = lrate
15
+ @simil_type = simil_type || :dot
16
+ @init_centr_vrange ||= vrange
14
17
  @vrange = case vrange
15
18
  when Array
16
19
  raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
@@ -19,30 +22,48 @@ module MachineLearningWorkbench::Compressor
19
22
  [vrange.first, vrange.last].map &method(:Float)
20
23
  else raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
21
24
  end
22
- @centrs = ncentrs.times.map { new_centr }
25
+ init_centrs
23
26
  @ntrains = [0]*ncentrs # useful to understand what happens
24
27
  end
25
28
 
26
29
  # Verify lrate to be present and withing unit bounds
27
- # As a separate method only so it can be overloaded in online_vq
30
+ # As a separate method only so it can be overloaded in `OnlineVectorQuantization`
28
31
  def check_lrate lrate
29
32
  raise ArgumentError, "Pass a `lrate` between 0 and 1" unless lrate&.between?(0,1)
30
33
  end
31
34
 
35
+ # Initializes a list of centroids
36
+ def init_centrs nc: ncentrs, base: nil, proport: nil
37
+ @centrs = nc.times.map { new_centr base, proport }
38
+ end
39
+
32
40
  # Creates a new (random) centroid
33
- def new_centr
34
- NArray.new(*dims).rand(*vrange)
41
+ # If a base is passed, this is meshed with the random centroid.
42
+ # This is done to facilitate distributing the training across centroids.
43
+ # TODO: USE RNG HERE!!
44
+ def new_centr base=nil, proport=nil
45
+ raise ArgumentError, "Either both or none" if base.nil? ^ proport.nil?
46
+ # require 'pry'; binding.pry if base.nil? ^ proport.nil?
47
+ ret = NArray.new(*dims).rand(*init_centr_vrange)
48
+ ret = ret * (1-proport) + base * proport if base&&proport
49
+ ret
35
50
  end
36
51
 
52
+ SIMIL = {
53
+ dot: -> (centr, vec) { centr.dot(vec) },
54
+ mse: -> (centr, vec) { -((centr-vec)**2).sum / centr.size }
55
+ }
56
+
37
57
  # Computes similarities between vector and all centroids
38
- def similarities vec
58
+ def similarities vec, type: simil_type
39
59
  raise NotImplementedError if vec.shape.size > 1
40
- centrs.map { |c| c.dot(vec) }
60
+ centrs.map { |centr| SIMIL[type].call centr, vec }
41
61
  # require 'parallel'
42
62
  # Parallel.map(centrs) { |c| c.dot(vec).first }
43
63
  end
44
64
 
45
65
  # Encode a vector
66
+ # TODO: optimize for Numo
46
67
  def encode vec, type: :most_similar
47
68
  simils = similarities vec
48
69
  case type
@@ -52,6 +73,7 @@ module MachineLearningWorkbench::Compressor
52
73
  simils
53
74
  when :ensemble_norm
54
75
  tot = simils.reduce(:+)
76
+ tot = 1 if tot == 0 # HACK: avoid division by zero
55
77
  simils.map { |s| s/tot }
56
78
  else raise ArgumentError, "unrecognized encode type: #{type}"
57
79
  end
@@ -83,19 +105,18 @@ module MachineLearningWorkbench::Compressor
83
105
 
84
106
  # Per-pixel errors in reconstructing vector
85
107
  # @return [NArray] residuals
86
- def reconstr_error vec
87
- reconstruction(vec) - vec
108
+ def reconstr_error vec, code: nil, type: :most_similar
109
+ code ||= encode vec, type: type
110
+ (vec - reconstruction(code, type: type)).abs.sum
88
111
  end
89
112
 
90
113
  # Train on one vector
91
114
  # @return [Integer] index of trained centroid
92
115
  def train_one vec
93
-
94
116
  trg_idx, _simil = most_similar_centr(vec)
95
- # note: uhm that actually looks like a dot product... optimizable?
117
+ # note: uhm that actually looks like a dot product... maybe faster?
96
118
  # `[c[i], vec].dot([1-lrate, lrate])`
97
119
  centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + vec * lrate
98
- # Verification.in_range! centrs[trg_idx], vrange # I verified it's not needed
99
120
  trg_idx
100
121
  end
101
122
 
@@ -231,7 +231,7 @@ module MachineLearningWorkbench::Monkey
231
231
  # @param other [NArray] other matrix
232
232
  # @return [NArray]
233
233
  def outer_flat other
234
- # TODO: Numo::NArray should be able to implement this with `#outer` and some other
234
+ # TODO: Xumo::NArray should be able to implement this with `#outer` and some other
235
235
  # function to flatten the right layer -- much faster
236
236
  raise ArgumentError, "Need to pass an operand block" unless block_given?
237
237
  self.class.zeros([self.size, other.size]).tap do |ret|
@@ -258,7 +258,7 @@ module MachineLearningWorkbench::Monkey
258
258
  # Inverses matrix
259
259
  # @return [NArray]
260
260
  def invert
261
- Numo::Linalg.inv self
261
+ NLinalg.inv self
262
262
  end
263
263
  end
264
264
 
@@ -268,7 +268,7 @@ module MachineLearningWorkbench::Monkey
268
268
  def exponential
269
269
  raise ArgumentError if ndim > 2
270
270
  # special case: one-dimensional matrix: just exponentiate the values
271
- return Numo::NMath.exp(self) if (ndim == 1) || shape.include?(1)
271
+ return NMath.exp(self) if (ndim == 1) || shape.include?(1)
272
272
  # at this point we need to validate it is a square matrix
273
273
  raise ArgumentError unless shape.reduce(&:==)
274
274
 
@@ -281,11 +281,11 @@ module MachineLearningWorkbench::Monkey
281
281
  # TODO: this is a simple but outdated method, switch to Pade approximation
282
282
  # https://github.com/scipy/scipy/blob/11509c4a98edded6c59423ac44ca1b7f28fba1fd/scipy/sparse/linalg/matfuncs.py#L557
283
283
 
284
- # e_values, l_e_vectors, r_e_vectors_t = Numo::Linalg.svd self
285
- evals, _wi, _vl, r_evecs = Numo::Linalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
284
+ # e_values, l_e_vectors, r_e_vectors_t = NLinalg.svd self
285
+ evals, _wi, _vl, r_evecs = NLinalg::Lapack.call(:geev, self, jobvl: false, jobvr: true)
286
286
  r_evecs_t = r_evecs#.transpose
287
287
  r_evecs_inv = r_evecs_t.invert
288
- evals_exp_dmat = Numo::NMath.exp(evals).diag
288
+ evals_exp_dmat = NMath.exp(evals).diag
289
289
 
290
290
  # l_e_vectors.dot(e_vals_exp_dmat).dot(l_e_vectors.invert)#.transpose
291
291
  r_evecs_t.dot(evals_exp_dmat).dot(r_evecs_inv)
@@ -157,7 +157,7 @@ module MachineLearningWorkbench::NeuralNetwork
157
157
  # Extract and convert the output layer's activation
158
158
  # @return [NArray] the activation of the output layer
159
159
  def out
160
- state.last
160
+ state.last.flatten
161
161
  end
162
162
 
163
163
  ## Activation functions
@@ -166,13 +166,13 @@ module MachineLearningWorkbench::NeuralNetwork
166
166
  def sigmoid k=0.5
167
167
  # k is steepness: 0<k<1 is flatter, 1<k is flatter
168
168
  # flatter makes activation less sensitive, better with large number of inputs
169
- -> (x) { 1.0 / (Numo::NMath.exp(-k * x) + 1.0) }
169
+ -> (x) { 1.0 / (NMath.exp(-k * x) + 1.0) }
170
170
  end
171
171
 
172
172
  # Traditional logistic
173
173
  def logistic
174
174
  -> (x) do
175
- exp = Numo::NMath.exp(x)
175
+ exp = NMath.exp(x)
176
176
  # exp.infinite? ? exp : exp / (1.0 + exp)
177
177
  exp / (1.0 + exp)
178
178
  end
@@ -181,7 +181,7 @@ module MachineLearningWorkbench::NeuralNetwork
181
181
  # LeCun hyperbolic activation
182
182
  # @see http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf Section 4.4
183
183
  def lecun_hyperbolic
184
- -> (x) { 1.7159 * Numo::NMath.tanh(2.0*x/3.0) + 1e-3*x }
184
+ -> (x) { 1.7159 * NMath.tanh(2.0*x/3.0) + 1e-3*x }
185
185
  end
186
186
 
187
187
  # Rectified Linear Unit (ReLU)
@@ -35,7 +35,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
35
35
 
36
36
  # Box-Muller transform: generates standard (unit) normal distribution samples
37
37
  # @return [Float] a single sample from a standard normal distribution
38
- # @note Numo::NArray implements this :) glad to have switched!
38
+ # @note Xumo::NArray implements this :) glad to have switched!
39
39
  def standard_normal_sample
40
40
  rho = Math.sqrt(-2.0 * Math.log(rng.rand))
41
41
  theta = 2 * Math::PI * rng.rand
@@ -80,7 +80,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
80
80
  # Samples a standard normal distribution to construct a NArray of
81
81
  # popsize multivariate samples of length ndims
82
82
  # @return [NArray] standard normal samples
83
- # @note Numo::NArray implements this :) glad to have switched!
83
+ # @note Xumo::NArray implements this :) glad to have switched!
84
84
  def standard_normal_samples
85
85
  NArray.zeros([popsize, ndims]).tap do |ret|
86
86
  ret.each_with_index { |_,*i| ret[*i] = standard_normal_sample }
@@ -104,7 +104,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
104
104
  # matched with individuals sorted by INCREASING fitness. Then reverse order for minimization.
105
105
  # @return standard normal samples sorted by the respective individuals' fitnesses
106
106
  def sorted_inds
107
- # samples = standard_normal_samples # Numo::NArray implements the Box-Muller :)
107
+ # samples = standard_normal_samples # Xumo::NArray implements the Box-Muller :)
108
108
  samples = NArray.new([popsize, ndims]).rand_norm(0,1)
109
109
  inds = move_inds(samples)
110
110
  fits = parallel_fit ? obj_fn.call(inds) : inds.map(&obj_fn)
@@ -28,7 +28,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
28
28
  def train picks: sorted_inds
29
29
  g_mu = utils.dot(picks)
30
30
  # g_sigma = utils.dot(picks.row_norms**2 - ndims).first # back to scalar
31
- row_norms = Numo::Linalg.norm picks, 2, axis:1
31
+ row_norms = NLinalg.norm picks, 2, axis:1
32
32
  g_sigma = utils.dot(row_norms**2 - ndims)[0] # back to scalar
33
33
  @mu += sigma.dot(g_mu.transpose).transpose * lrate
34
34
  @variance *= Math.exp(g_sigma * lrate / 2)
@@ -24,7 +24,7 @@ module MachineLearningWorkbench::Optimizer::NaturalEvolutionStrategies
24
24
  raise ArgumentError, "Something is wrong with sigma_init: #{sigma_init}"
25
25
  end
26
26
  # Works with the log of sigma to avoid continuous decompositions (thanks Sun Yi)
27
- @log_sigma = Numo::NMath.log(sigma.diagonal).diag
27
+ @log_sigma = NMath.log(sigma.diagonal).diag
28
28
  end
29
29
 
30
30
  def train picks: sorted_inds
@@ -7,10 +7,11 @@ module MachineLearningWorkbench::Tools
7
7
  # @param narr [NArray] numeric matrix to display
8
8
  # @param shape [Array<Integer>] optional reshaping
9
9
  def self.narr_to_img narr, shape: nil
10
+ require 'rmagick'
10
11
  shape ||= narr.shape
11
12
  shape = [1, shape] if shape.kind_of?(Integer) || shape.size == 1
12
13
  # `Image::constitute` requires Float pixels to be in [0,1]
13
- pixels = Norm.feature_scaling narr, to: [0,1]
14
+ pixels = Norm.feature_scaling narr.cast_to(NArray), to: [0,1]
14
15
  Magick::Image.constitute *shape, "I", pixels.to_a.flatten
15
16
  end
16
17
 
@@ -28,6 +29,7 @@ module MachineLearningWorkbench::Tools
28
29
  # @param shape [Array] the true shape of the image (numeric matrix could be flattened)
29
30
  # @param in_fork [bool] whether to execute the display in fork (and continue running)
30
31
  def self.display narr, disp_size: nil, shape: nil, in_fork: true
32
+ require 'rmagick'
31
33
  img = narr_to_img narr, shape: shape
32
34
  img.resize!(*disp_size, Magick::TriangleFilter,0.51) if disp_size
33
35
  if in_fork
@@ -43,6 +45,7 @@ module MachineLearningWorkbench::Tools
43
45
  # @param flat [bool] whether to return a flat array
44
46
  # @param dtype dtype for the numeric matrix, leave `nil` for automatic detection
45
47
  def self.narr_from_png fname, scale: nil, flat: false
48
+ require 'rmagick'
46
49
  img = Magick::ImageList.new(fname).first
47
50
  img.scale!(scale) if scale
48
51
  shape = [img.columns, img.rows]
@@ -4,7 +4,10 @@ module MachineLearningWorkbench::Tools
4
4
  from ||= narr.minmax
5
5
  old_min, old_max = from
6
6
  new_min, new_max = to
7
- (narr-old_min)*(new_max-new_min)/(old_max-old_min)+new_min
7
+ ( (narr-old_min)*(new_max-new_min)/(old_max-old_min) ) + new_min
8
+ rescue ZeroDivisionError
9
+ # require 'pry'; binding.pry
10
+ raise ArgumentError, "If you get here, chances are there's a bug in `from` or `to`"
8
11
  end
9
12
 
10
13
  # @param per_column [bool] wheather to compute stats per-column or matrix-wise
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: machine_learning_workbench
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Giuseppe Cuccu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-27 00:00:00.000000000 Z
11
+ date: 2018-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler