machine_learning_workbench 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +15 -0
  3. data/.gitignore +11 -0
  4. data/.rspec +3 -0
  5. data/.travis.yml +5 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +70 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +37 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/lib/machine_learning_workbench.rb +19 -0
  14. data/lib/machine_learning_workbench/compressor.rb +1 -0
  15. data/lib/machine_learning_workbench/compressor/vector_quantization.rb +74 -0
  16. data/lib/machine_learning_workbench/monkey.rb +197 -0
  17. data/lib/machine_learning_workbench/neural_network.rb +3 -0
  18. data/lib/machine_learning_workbench/neural_network/base.rb +211 -0
  19. data/lib/machine_learning_workbench/neural_network/feed_forward.rb +20 -0
  20. data/lib/machine_learning_workbench/neural_network/recurrent.rb +35 -0
  21. data/lib/machine_learning_workbench/optimizer.rb +7 -0
  22. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +112 -0
  23. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb +104 -0
  24. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb +40 -0
  25. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb +46 -0
  26. data/lib/machine_learning_workbench/tools.rb +4 -0
  27. data/lib/machine_learning_workbench/tools/execution.rb +18 -0
  28. data/lib/machine_learning_workbench/tools/imaging.rb +48 -0
  29. data/lib/machine_learning_workbench/tools/normalization.rb +22 -0
  30. data/lib/machine_learning_workbench/tools/verification.rb +11 -0
  31. data/machine_learning_workbench.gemspec +36 -0
  32. metadata +216 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7f8075c35b1a57c76c34e17ea94d9a2ff95bb5c1
4
+ data.tar.gz: c89aca6335ae3d3b15b08d8c0b0cb06f348d78cd
5
+ SHA512:
6
+ metadata.gz: 7b6f1245dc746fe149cbf25a66f486482454a4a0b4e756cc0bbf66e7a65e90158cdeb054696311f267cf1054dabbe9fabc51ce2a6fffb76eb61d5febf51ec723
7
+ data.tar.gz: c111a7b0ada4aa24c3ad996f23791c28e1a37c1be4978ceecd694c07d4199fc3c9ec7a9ac7f4839eb1e81f84ef805d76df90038a9bd3f22e6510812086fa867e
data/.codeclimate.yml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ engines:
3
+ rubocop:
4
+ enabled: true
5
+ duplication:
6
+ enabled: true
7
+ config:
8
+ languages:
9
+ - ruby
10
+ ratings:
11
+ paths:
12
+ - lib/**
13
+ - "**.rb"
14
+ exclude_paths:
15
+ - spec/**
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.2
5
+ before_install: gem install bundler -v 1.16.0
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in machine_learning_workbench.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,70 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ machine_learning_workbench (0.0.0)
5
+ nmatrix-atlas (~> 0.2)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ backports (3.11.1)
11
+ binding_of_caller (0.8.0)
12
+ debug_inspector (>= 0.0.1)
13
+ coderay (1.1.2)
14
+ debug_inspector (0.0.3)
15
+ diff-lcs (1.3)
16
+ interception (0.5)
17
+ method_source (0.8.2)
18
+ nmatrix (0.2.4)
19
+ packable (~> 1.3, >= 1.3.5)
20
+ nmatrix-atlas (0.2.4)
21
+ nmatrix (= 0.2.4)
22
+ packable (1.3.9)
23
+ backports
24
+ parallel (1.12.1)
25
+ pry (0.10.4)
26
+ coderay (~> 1.1.0)
27
+ method_source (~> 0.8.1)
28
+ slop (~> 3.4)
29
+ pry-nav (0.2.4)
30
+ pry (>= 0.9.10, < 0.11.0)
31
+ pry-rescue (1.4.5)
32
+ interception (>= 0.5)
33
+ pry
34
+ pry-stack_explorer (0.4.9.2)
35
+ binding_of_caller (>= 0.7)
36
+ pry (>= 0.9.11)
37
+ rake (10.5.0)
38
+ rmagick (2.16.0)
39
+ rspec (3.7.0)
40
+ rspec-core (~> 3.7.0)
41
+ rspec-expectations (~> 3.7.0)
42
+ rspec-mocks (~> 3.7.0)
43
+ rspec-core (3.7.1)
44
+ rspec-support (~> 3.7.0)
45
+ rspec-expectations (3.7.0)
46
+ diff-lcs (>= 1.2.0, < 2.0)
47
+ rspec-support (~> 3.7.0)
48
+ rspec-mocks (3.7.0)
49
+ diff-lcs (>= 1.2.0, < 2.0)
50
+ rspec-support (~> 3.7.0)
51
+ rspec-support (3.7.1)
52
+ slop (3.6.0)
53
+
54
+ PLATFORMS
55
+ ruby
56
+
57
+ DEPENDENCIES
58
+ bundler (~> 1.16)
59
+ machine_learning_workbench!
60
+ parallel
61
+ pry (~> 0.10)
62
+ pry-nav (~> 0.2)
63
+ pry-rescue (~> 1.4)
64
+ pry-stack_explorer (~> 0.4)
65
+ rake (~> 10.0)
66
+ rmagick
67
+ rspec (~> 3.0)
68
+
69
+ BUNDLED WITH
70
+ 1.16.1
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Giuseppe Cuccu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # Machine Learning Workbench
2
+
3
+ This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'machine_learning_workbench'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install machine_learning_workbench
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Development
26
+
27
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
28
+
29
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
30
+
31
+ ## Contributing
32
+
33
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/machine_learning_workbench.
34
+
35
+ ## License
36
+
37
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "machine_learning_workbench"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
12
+
13
+ # require "irb"
14
+ # IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,19 @@
1
+ require 'nmatrix'
2
+
3
+ module MachineLearningWorkbench
4
+ module Compressor
5
+ end
6
+ module NeuralNetwork
7
+ end
8
+ module Optimizer
9
+ end
10
+ end
11
+ module Tools
12
+ end
13
+ end
14
+
15
+ require_relative 'machine_learning_workbench/monkey'
16
+ require_relative 'machine_learning_workbench/tools'
17
+ require_relative 'machine_learning_workbench/compressor'
18
+ require_relative 'machine_learning_workbench/neural_network'
19
+ require_relative 'machine_learning_workbench/optimizer'
@@ -0,0 +1 @@
1
+ require_relative 'compressor/vector_quantization'
@@ -0,0 +1,74 @@
1
+ module MachineLearningWorkbench::Compressor
2
+ class VectorQuantization
3
+ attr_reader :ncentrs, :centrs, :dims, :vrange, :dtype, :lrate, :rng
4
+ Verification = MachineLearningWorkbench::Tools::Verification
5
+
6
+ def initialize ncentrs:, dims:, vrange:, dtype:, lrate:, rseed: Random.new_seed
7
+ @rng = Random.new rseed
8
+ @ncentrs = ncentrs
9
+ @dtype = dtype
10
+ @dims = dims
11
+ @lrate = lrate
12
+ @vrange = case vrange
13
+ when Array
14
+ raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
15
+ vrange.map &method(:Float)
16
+ when Range
17
+ [vrange.first, vrange.last].map &method(:Float)
18
+ else
19
+ raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
20
+ end
21
+ @centrs = ncentrs.times.map { new_centr }
22
+ end
23
+
24
+ # Creates a new (random) centroid
25
+ def new_centr
26
+ NMatrix.new(dims, dtype: dtype) { rng.rand Range.new *vrange }
27
+ end
28
+
29
+ # Computes similarities between image and all centroids
30
+ def similarities img
31
+ raise NotImplementedError if img.shape.size > 1
32
+ # centrs.map { |c| c.dot(img).first }
33
+ require 'parallel'
34
+ Parallel.map(centrs) { |c| c.dot(img).first }
35
+ end
36
+ # The list of similarities also constitutes the encoding of the image
37
+ alias encode similarities
38
+
39
+ # Returns index and similitude of most similar centroid to image
40
+ def most_similar_centr img
41
+ simils = similarities img
42
+ max_simil = simils.max
43
+ max_idx = simils.index max_simil
44
+ [max_idx, max_simil]
45
+ end
46
+
47
+ # Reconstruct image as its most similar centroid
48
+ def reconstruction img
49
+ centrs[most_similar_centr(img).first]
50
+ end
51
+
52
+ # Per-pixel errors in reconstructing image
53
+ def reconstr_error img
54
+ reconstruction(img) - img
55
+ end
56
+
57
+ # Train on one image
58
+ def train_one img, simils: nil
59
+ trg_idx, _simil = simils || most_similar_centr(img)
60
+ centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + img * lrate
61
+ Verification.in_range! centrs[trg_idx], vrange
62
+ centrs[trg_idx]
63
+ end
64
+
65
+ # Train on image list
66
+ def train img_lst, debug: false
67
+ # Two ways here:
68
+ # - Batch: canonical, centrs updated with each img
69
+ # - Parallel: could be parallel either on simils or on training (?)
70
+ # Unsure on the correctness of either Parallel, let's stick with Batch
71
+ img_lst.each { |img| train_one img; print '.' if debug }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,197 @@
1
+
2
+ # Monkey patches
3
+
4
+ module MachineLearningWorkbench::Monkey
5
+ module Dimensionable
6
+ def dims ret: []
7
+ ret << size
8
+ if first.kind_of? Array
9
+ # hypothesize all elements having same size and save some checks
10
+ first.dims ret: ret
11
+ else
12
+ ret
13
+ end
14
+ end
15
+ end
16
+
17
+ module Buildable
18
+ def new *args
19
+ super.tap do |m|
20
+ if block_given?
21
+ m.each_stored_with_indices do |_,*idxs|
22
+ m[*idxs] = yield *idxs
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ module AdvancelyOperationable # how am I supposed to name these things??
30
+
31
+ # Outer matrix relationship generalization.
32
+ # Make a matrix the same shape as `self`; each element is a matrix,
33
+ # with the same shape as `other`, resulting from the interaction of
34
+ # the corresponding element in `self` and all the elements in `other`.
35
+ # @param other [NMatrix] other matrix
36
+ # @note This implementation works only for 2D matrices (same as most
37
+ # other methods here). It's a quick hack, a proof of concept barely
38
+ # sufficient for my urgent needs.
39
+ # @note Output size is fixed! Since NMatrix does not graciously yield to
40
+ # being composed of other NMatrices (by adapting the shape of the root
41
+ # matrix), the block cannot return matrices in there.
42
+ # @return [NMatrix]
43
+ def outer other
44
+ # NOTE: Map of map in NMatrix does not work as expected!
45
+ # self.map { |v1| other.map { |v2| yield(v1,v2) } }
46
+ # NOTE: this doesn't cut it either... can't capture the structure
47
+ # NMatrix[ *self.collect { |v1| other.collect { |v2| yield(v1,v2) } } ]
48
+ raise ArgumentError unless block_given?
49
+ NMatrix.new(self.shape+other.shape).tap do |m|
50
+ each_stored_with_indices do |v1,r1,c1|
51
+ other.each_stored_with_indices do |v2,r2,c2|
52
+ m[r1,c1,r2,c2] = yield(v1,v2)
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ # Flat-output generalized outer relationship. Same as `#outer`, but the
59
+ # result is a 2-dim matrix of the interactions between all the elements
60
+ # in `self` (as rows) and all the elements in `other` (as columns)
61
+ # @param other [NMatrix] other matrix
62
+ # @return [NMatrix]
63
+ def outer_flat other
64
+ raise ArgumentError unless block_given?
65
+ data = collect { |v1| other.collect { |v2| yield(v1, v2) } }
66
+ self.class[*data, dtype: dtype]
67
+ end
68
+
69
+ # Matrix exponential: `e^self` (not to be confused with `self^n`!)
70
+ # @return [NMatrix]
71
+ def exponential
72
+ # special case: one-dimensional matrix: just exponentiate the values
73
+ if (dim == 1) || (dim == 2 && shape.include?(1))
74
+ return NMatrix.new shape, collect(&Math.method(:exp)), dtype: dtype
75
+ end
76
+
77
+ # Eigenvalue decomposition method from scipy/linalg/matfuncs.py#expm2
78
+
79
+ # TODO: find out why can't I get away without double transpose!
80
+ e_values, e_vectors = eigen_symm
81
+
82
+ e_vals_exp_dmat = NMatrix.diagonal e_values.collect(&Math.method(:exp))
83
+ # ASSUMING WE'RE ONLY USING THIS TO EXPONENTIATE LOG_SIGMA IN XNES
84
+ # Theoretically we need the right eigenvectors, which for a symmetric
85
+ # matrix should be just transposes of the eigenvectors.
86
+ # But we have a positive definite matrix, so the final composition
87
+ # below holds without transposing
88
+ # BUT, strangely, I can't seem to get eigen_symm to green the tests
89
+ # ...with or without transpose
90
+ # e_vectors = e_vectors.transpose
91
+ e_vectors.dot(e_vals_exp_dmat).dot(e_vectors.invert)#.transpose
92
+ end
93
+
94
+ # Calculate matrix eigenvalues and eigenvectors using LAPACK
95
+ # @param which [:both, :left, :right] which eigenvectors do you want?
96
+ # @return [Array<NMatrix, NMatrix[, NMatrix]>]
97
+ # eigenvalues (as column vector), left eigenvectors, right eigenvectors.
98
+ # A value different than `:both` for param `which` reduces the return size.
99
+ # @note requires LAPACK
100
+ # @note WARNING! a param `which` different than :both alters the returns
101
+ # @note WARNING! machine-precision-error imaginary part Complex
102
+ # often returned! For symmetric matrices use #eigen_symm_right below
103
+ def eigen which=:both
104
+ raise ArgumentError unless [:both, :left, :right].include? which
105
+ NMatrix::LAPACK.geev(self, which)
106
+ end
107
+
108
+ # Eigenvalues and right eigenvectors for symmetric matrices using LAPACK
109
+ # @note code taken from gem `nmatrix-atlas` NMatrix::LAPACK#geev
110
+ # @note FOR SYMMETRIC MATRICES ONLY!!
111
+ # @note WARNING: will return real matrices, imaginary parts are discarded!
112
+ # @note WARNING: only left eigenvectors will be returned!
113
+ # @todo could it be possible to save some of the transpositions?
114
+ # @return [Array<NMatrix, NMatrix>] eigenvalues and (left) eigenvectors
115
+ def eigen_symm
116
+ # TODO: check for symmetry if not too slow
117
+ raise TypeError, "Only real-valued matrices" if complex_dtype?
118
+ raise StorageTypeError, "Only dense matrices (because LAPACK)" unless dense?
119
+ raise ShapeError, "Only square matrices" unless dim == 2 && shape[0] == shape[1]
120
+
121
+ n = shape[0]
122
+
123
+ # Outputs
124
+ e_values = NMatrix.new([n, 1], dtype: dtype)
125
+ e_values_img = NMatrix.new([n, 1], dtype: dtype) # to satisfy C alloc
126
+ e_vectors = clone_structure
127
+
128
+ NMatrix::LAPACK::lapack_geev(
129
+ false, # compute left eigenvectors of A?
130
+ :t, # compute right eigenvectors of A? (left eigenvectors of A**T)
131
+ n, # order of the matrix
132
+ transpose, # input matrix => needs to be column-wise # self,
133
+ n, # leading dimension of matrix
134
+ e_values, # real part of computed eigenvalues
135
+ e_values_img, # imaginary part of computed eigenvalues (will be discarded)
136
+ nil, # left eigenvectors, if applicable
137
+ n, # leading dimension of left_output
138
+ e_vectors, # right eigenvectors, if applicable
139
+ n, # leading dimension of right_output
140
+ 2*n # no clue what's this
141
+ )
142
+
143
+ raise "Uhm why complex eigenvalues?" if e_values_img.any? {|v| v>1e-10}
144
+ return [e_values, e_vectors.transpose]
145
+ end
146
+
147
+
148
+ # `NMatrix#to_a` has inconsistent behavior: single-row matrices are
149
+ # converted to one-dimensional Arrays rather than a 2D Array with
150
+ # only one row. Patching `#to_a` directly is not feasible as the
151
+ # constructor seems to depend on it, and I have little interest in
152
+ # investigating further.
153
+ # @return [Array<Array>] a consistent array representation, such that
154
+ # `nmat.to_consistent_a.to_nm == nmat` holds for single-row matrices
155
+ def to_consistent_a
156
+ dim == 2 && shape[0] == 1 ? [to_a] : to_a
157
+ end
158
+ alias :to_ca :to_consistent_a
159
+ end
160
+
161
+ module NumericallyApproximatable
162
+ # Verifies if `self` and `other` are withing `epsilon` of each other.
163
+ # @param other [Numeric]
164
+ # @param epsilon [Numeric]
165
+ # @return [Boolean]
166
+ def approximates? other, epsilon=1e-5
167
+ # Used for testing and NMatrix#approximates?, should I move to spec_helper?
168
+ (self - other).abs < epsilon
169
+ end
170
+ end
171
+
172
+ module MatrixApproximatable
173
+ # Verifies if all values at corresponding indices approximate each other.
174
+ # @param other [NMatrix]
175
+ # @param epsilon [Float]
176
+ def approximates? other, epsilon=1e-5
177
+ return false unless self.shape == other.shape
178
+ # two ways to go here:
179
+ # - epsilon is aggregated: total cumulative accepted error
180
+ # => `(self - other).reduce(:+) < epsilon`
181
+ # - epsilon is local: per element accepted error
182
+ # => `v.approximates? other[*idxs], epsilon`
183
+ # Given the use I make (near-equality), I choose the first interpretation
184
+ # Note the second is sensitive to opposite signs balancing up
185
+ self.each_stored_with_indices.all? do |v,*idxs|
186
+ v.approximates? other[*idxs], epsilon
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ Array.include MachineLearningWorkbench::Monkey::Dimensionable
193
+ NMatrix.extend MachineLearningWorkbench::Monkey::Buildable
194
+ require 'nmatrix/lapack_plugin' # loads whichever is installed between atlas and lapacke
195
+ NMatrix.include MachineLearningWorkbench::Monkey::AdvancelyOperationable
196
+ Numeric.include MachineLearningWorkbench::Monkey::NumericallyApproximatable
197
+ NMatrix.include MachineLearningWorkbench::Monkey::MatrixApproximatable