machine_learning_workbench 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +7 -0
  2. data/.codeclimate.yml +15 -0
  3. data/.gitignore +11 -0
  4. data/.rspec +3 -0
  5. data/.travis.yml +5 -0
  6. data/Gemfile +6 -0
  7. data/Gemfile.lock +70 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +37 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +8 -0
  13. data/lib/machine_learning_workbench.rb +19 -0
  14. data/lib/machine_learning_workbench/compressor.rb +1 -0
  15. data/lib/machine_learning_workbench/compressor/vector_quantization.rb +74 -0
  16. data/lib/machine_learning_workbench/monkey.rb +197 -0
  17. data/lib/machine_learning_workbench/neural_network.rb +3 -0
  18. data/lib/machine_learning_workbench/neural_network/base.rb +211 -0
  19. data/lib/machine_learning_workbench/neural_network/feed_forward.rb +20 -0
  20. data/lib/machine_learning_workbench/neural_network/recurrent.rb +35 -0
  21. data/lib/machine_learning_workbench/optimizer.rb +7 -0
  22. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +112 -0
  23. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb +104 -0
  24. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb +40 -0
  25. data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb +46 -0
  26. data/lib/machine_learning_workbench/tools.rb +4 -0
  27. data/lib/machine_learning_workbench/tools/execution.rb +18 -0
  28. data/lib/machine_learning_workbench/tools/imaging.rb +48 -0
  29. data/lib/machine_learning_workbench/tools/normalization.rb +22 -0
  30. data/lib/machine_learning_workbench/tools/verification.rb +11 -0
  31. data/machine_learning_workbench.gemspec +36 -0
  32. metadata +216 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7f8075c35b1a57c76c34e17ea94d9a2ff95bb5c1
4
+ data.tar.gz: c89aca6335ae3d3b15b08d8c0b0cb06f348d78cd
5
+ SHA512:
6
+ metadata.gz: 7b6f1245dc746fe149cbf25a66f486482454a4a0b4e756cc0bbf66e7a65e90158cdeb054696311f267cf1054dabbe9fabc51ce2a6fffb76eb61d5febf51ec723
7
+ data.tar.gz: c111a7b0ada4aa24c3ad996f23791c28e1a37c1be4978ceecd694c07d4199fc3c9ec7a9ac7f4839eb1e81f84ef805d76df90038a9bd3f22e6510812086fa867e
data/.codeclimate.yml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ engines:
3
+ rubocop:
4
+ enabled: true
5
+ duplication:
6
+ enabled: true
7
+ config:
8
+ languages:
9
+ - ruby
10
+ ratings:
11
+ paths:
12
+ - lib/**
13
+ - "**.rb"
14
+ exclude_paths:
15
+ - spec/**
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.2
5
+ before_install: gem install bundler -v 1.16.0
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in machine_learning_workbench.gemspec
6
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,70 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ machine_learning_workbench (0.0.0)
5
+ nmatrix-atlas (~> 0.2)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ backports (3.11.1)
11
+ binding_of_caller (0.8.0)
12
+ debug_inspector (>= 0.0.1)
13
+ coderay (1.1.2)
14
+ debug_inspector (0.0.3)
15
+ diff-lcs (1.3)
16
+ interception (0.5)
17
+ method_source (0.8.2)
18
+ nmatrix (0.2.4)
19
+ packable (~> 1.3, >= 1.3.5)
20
+ nmatrix-atlas (0.2.4)
21
+ nmatrix (= 0.2.4)
22
+ packable (1.3.9)
23
+ backports
24
+ parallel (1.12.1)
25
+ pry (0.10.4)
26
+ coderay (~> 1.1.0)
27
+ method_source (~> 0.8.1)
28
+ slop (~> 3.4)
29
+ pry-nav (0.2.4)
30
+ pry (>= 0.9.10, < 0.11.0)
31
+ pry-rescue (1.4.5)
32
+ interception (>= 0.5)
33
+ pry
34
+ pry-stack_explorer (0.4.9.2)
35
+ binding_of_caller (>= 0.7)
36
+ pry (>= 0.9.11)
37
+ rake (10.5.0)
38
+ rmagick (2.16.0)
39
+ rspec (3.7.0)
40
+ rspec-core (~> 3.7.0)
41
+ rspec-expectations (~> 3.7.0)
42
+ rspec-mocks (~> 3.7.0)
43
+ rspec-core (3.7.1)
44
+ rspec-support (~> 3.7.0)
45
+ rspec-expectations (3.7.0)
46
+ diff-lcs (>= 1.2.0, < 2.0)
47
+ rspec-support (~> 3.7.0)
48
+ rspec-mocks (3.7.0)
49
+ diff-lcs (>= 1.2.0, < 2.0)
50
+ rspec-support (~> 3.7.0)
51
+ rspec-support (3.7.1)
52
+ slop (3.6.0)
53
+
54
+ PLATFORMS
55
+ ruby
56
+
57
+ DEPENDENCIES
58
+ bundler (~> 1.16)
59
+ machine_learning_workbench!
60
+ parallel
61
+ pry (~> 0.10)
62
+ pry-nav (~> 0.2)
63
+ pry-rescue (~> 1.4)
64
+ pry-stack_explorer (~> 0.4)
65
+ rake (~> 10.0)
66
+ rmagick
67
+ rspec (~> 3.0)
68
+
69
+ BUNDLED WITH
70
+ 1.16.1
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Giuseppe Cuccu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # Machine Learning Workbench
2
+
3
+ This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'machine_learning_workbench'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install machine_learning_workbench
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Development
26
+
27
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
28
+
29
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
30
+
31
+ ## Contributing
32
+
33
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/machine_learning_workbench.
34
+
35
+ ## License
36
+
37
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "machine_learning_workbench"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
12
+
13
+ # require "irb"
14
+ # IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,19 @@
1
+ require 'nmatrix'
2
+
3
+ module MachineLearningWorkbench
4
+ module Compressor
5
+ end
6
+ module NeuralNetwork
7
+ end
8
+ module Optimizer
9
+ end
10
+ end
11
+ module Tools
12
+ end
13
+ end
14
+
15
+ require_relative 'machine_learning_workbench/monkey'
16
+ require_relative 'machine_learning_workbench/tools'
17
+ require_relative 'machine_learning_workbench/compressor'
18
+ require_relative 'machine_learning_workbench/neural_network'
19
+ require_relative 'machine_learning_workbench/optimizer'
@@ -0,0 +1 @@
1
+ require_relative 'compressor/vector_quantization'
@@ -0,0 +1,74 @@
1
+ module MachineLearningWorkbench::Compressor
2
+ class VectorQuantization
3
+ attr_reader :ncentrs, :centrs, :dims, :vrange, :dtype, :lrate, :rng
4
+ Verification = MachineLearningWorkbench::Tools::Verification
5
+
6
+ def initialize ncentrs:, dims:, vrange:, dtype:, lrate:, rseed: Random.new_seed
7
+ @rng = Random.new rseed
8
+ @ncentrs = ncentrs
9
+ @dtype = dtype
10
+ @dims = dims
11
+ @lrate = lrate
12
+ @vrange = case vrange
13
+ when Array
14
+ raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
15
+ vrange.map &method(:Float)
16
+ when Range
17
+ [vrange.first, vrange.last].map &method(:Float)
18
+ else
19
+ raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
20
+ end
21
+ @centrs = ncentrs.times.map { new_centr }
22
+ end
23
+
24
+ # Creates a new (random) centroid
25
+ def new_centr
26
+ NMatrix.new(dims, dtype: dtype) { rng.rand Range.new *vrange }
27
+ end
28
+
29
+ # Computes similarities between image and all centroids
30
+ def similarities img
31
+ raise NotImplementedError if img.shape.size > 1
32
+ # centrs.map { |c| c.dot(img).first }
33
+ require 'parallel'
34
+ Parallel.map(centrs) { |c| c.dot(img).first }
35
+ end
36
+ # The list of similarities also constitutes the encoding of the image
37
+ alias encode similarities
38
+
39
+ # Returns index and similitude of most similar centroid to image
40
+ def most_similar_centr img
41
+ simils = similarities img
42
+ max_simil = simils.max
43
+ max_idx = simils.index max_simil
44
+ [max_idx, max_simil]
45
+ end
46
+
47
+ # Reconstruct image as its most similar centroid
48
+ def reconstruction img
49
+ centrs[most_similar_centr(img).first]
50
+ end
51
+
52
+ # Per-pixel errors in reconstructing image
53
+ def reconstr_error img
54
+ reconstruction(img) - img
55
+ end
56
+
57
+ # Train on one image
58
+ def train_one img, simils: nil
59
+ trg_idx, _simil = simils || most_similar_centr(img)
60
+ centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + img * lrate
61
+ Verification.in_range! centrs[trg_idx], vrange
62
+ centrs[trg_idx]
63
+ end
64
+
65
+ # Train on image list
66
+ def train img_lst, debug: false
67
+ # Two ways here:
68
+ # - Batch: canonical, centrs updated with each img
69
+ # - Parallel: could be parallel either on simils or on training (?)
70
+ # Unsure on the correctness of either Parallel, let's stick with Batch
71
+ img_lst.each { |img| train_one img; print '.' if debug }
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,197 @@
1
+
2
+ # Monkey patches
3
+
4
+ module MachineLearningWorkbench::Monkey
5
+ module Dimensionable
6
+ def dims ret: []
7
+ ret << size
8
+ if first.kind_of? Array
9
+ # hypothesize all elements having same size and save some checks
10
+ first.dims ret: ret
11
+ else
12
+ ret
13
+ end
14
+ end
15
+ end
16
+
17
+ module Buildable
18
+ def new *args
19
+ super.tap do |m|
20
+ if block_given?
21
+ m.each_stored_with_indices do |_,*idxs|
22
+ m[*idxs] = yield *idxs
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ module AdvancelyOperationable # how am I supposed to name these things??
30
+
31
+ # Outer matrix relationship generalization.
32
+ # Make a matrix the same shape as `self`; each element is a matrix,
33
+ # with the same shape as `other`, resulting from the interaction of
34
+ # the corresponding element in `self` and all the elements in `other`.
35
+ # @param other [NMatrix] other matrix
36
+ # @note This implementation works only for 2D matrices (same as most
37
+ # other methods here). It's a quick hack, a proof of concept barely
38
+ # sufficient for my urgent needs.
39
+ # @note Output size is fixed! Since NMatrix does not graciously yield to
40
+ # being composed of other NMatrices (by adapting the shape of the root
41
+ # matrix), the block cannot return matrices in there.
42
+ # @return [NMatrix]
43
+ def outer other
44
+ # NOTE: Map of map in NMatrix does not work as expected!
45
+ # self.map { |v1| other.map { |v2| yield(v1,v2) } }
46
+ # NOTE: this doesn't cut it either... can't capture the structure
47
+ # NMatrix[ *self.collect { |v1| other.collect { |v2| yield(v1,v2) } } ]
48
+ raise ArgumentError unless block_given?
49
+ NMatrix.new(self.shape+other.shape).tap do |m|
50
+ each_stored_with_indices do |v1,r1,c1|
51
+ other.each_stored_with_indices do |v2,r2,c2|
52
+ m[r1,c1,r2,c2] = yield(v1,v2)
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ # Flat-output generalized outer relationship. Same as `#outer`, but the
59
+ # result is a 2-dim matrix of the interactions between all the elements
60
+ # in `self` (as rows) and all the elements in `other` (as columns)
61
+ # @param other [NMatrix] other matrix
62
+ # @return [NMatrix]
63
+ def outer_flat other
64
+ raise ArgumentError unless block_given?
65
+ data = collect { |v1| other.collect { |v2| yield(v1, v2) } }
66
+ self.class[*data, dtype: dtype]
67
+ end
68
+
69
+ # Matrix exponential: `e^self` (not to be confused with `self^n`!)
70
+ # @return [NMatrix]
71
+ def exponential
72
+ # special case: one-dimensional matrix: just exponentiate the values
73
+ if (dim == 1) || (dim == 2 && shape.include?(1))
74
+ return NMatrix.new shape, collect(&Math.method(:exp)), dtype: dtype
75
+ end
76
+
77
+ # Eigenvalue decomposition method from scipy/linalg/matfuncs.py#expm2
78
+
79
+ # TODO: find out why can't I get away without double transpose!
80
+ e_values, e_vectors = eigen_symm
81
+
82
+ e_vals_exp_dmat = NMatrix.diagonal e_values.collect(&Math.method(:exp))
83
+ # ASSUMING WE'RE ONLY USING THIS TO EXPONENTIATE LOG_SIGMA IN XNES
84
+ # Theoretically we need the right eigenvectors, which for a symmetric
85
+ # matrix should be just transposes of the eigenvectors.
86
+ # But we have a positive definite matrix, so the final composition
87
+ # below holds without transposing
88
+ # BUT, strangely, I can't seem to get eigen_symm to green the tests
89
+ # ...with or without transpose
90
+ # e_vectors = e_vectors.transpose
91
+ e_vectors.dot(e_vals_exp_dmat).dot(e_vectors.invert)#.transpose
92
+ end
93
+
94
+ # Calculate matrix eigenvalues and eigenvectors using LAPACK
95
+ # @param which [:both, :left, :right] which eigenvectors do you want?
96
+ # @return [Array<NMatrix, NMatrix[, NMatrix]>]
97
+ # eigenvalues (as column vector), left eigenvectors, right eigenvectors.
98
+ # A value different than `:both` for param `which` reduces the return size.
99
+ # @note requires LAPACK
100
+ # @note WARNING! a param `which` different than :both alters the returns
101
+ # @note WARNING! machine-precision-error imaginary part Complex
102
+ # often returned! For symmetric matrices use #eigen_symm_right below
103
+ def eigen which=:both
104
+ raise ArgumentError unless [:both, :left, :right].include? which
105
+ NMatrix::LAPACK.geev(self, which)
106
+ end
107
+
108
+ # Eigenvalues and right eigenvectors for symmetric matrices using LAPACK
109
+ # @note code taken from gem `nmatrix-atlas` NMatrix::LAPACK#geev
110
+ # @note FOR SYMMETRIC MATRICES ONLY!!
111
+ # @note WARNING: will return real matrices, imaginary parts are discarded!
112
+ # @note WARNING: only left eigenvectors will be returned!
113
+ # @todo could it be possible to save some of the transpositions?
114
+ # @return [Array<NMatrix, NMatrix>] eigenvalues and (left) eigenvectors
115
+ def eigen_symm
116
+ # TODO: check for symmetry if not too slow
117
+ raise TypeError, "Only real-valued matrices" if complex_dtype?
118
+ raise StorageTypeError, "Only dense matrices (because LAPACK)" unless dense?
119
+ raise ShapeError, "Only square matrices" unless dim == 2 && shape[0] == shape[1]
120
+
121
+ n = shape[0]
122
+
123
+ # Outputs
124
+ e_values = NMatrix.new([n, 1], dtype: dtype)
125
+ e_values_img = NMatrix.new([n, 1], dtype: dtype) # to satisfy C alloc
126
+ e_vectors = clone_structure
127
+
128
+ NMatrix::LAPACK::lapack_geev(
129
+ false, # compute left eigenvectors of A?
130
+ :t, # compute right eigenvectors of A? (left eigenvectors of A**T)
131
+ n, # order of the matrix
132
+ transpose, # input matrix => needs to be column-wise # self,
133
+ n, # leading dimension of matrix
134
+ e_values, # real part of computed eigenvalues
135
+ e_values_img, # imaginary part of computed eigenvalues (will be discarded)
136
+ nil, # left eigenvectors, if applicable
137
+ n, # leading dimension of left_output
138
+ e_vectors, # right eigenvectors, if applicable
139
+ n, # leading dimension of right_output
140
+ 2*n # no clue what's this
141
+ )
142
+
143
+ raise "Uhm why complex eigenvalues?" if e_values_img.any? {|v| v>1e-10}
144
+ return [e_values, e_vectors.transpose]
145
+ end
146
+
147
+
148
+ # `NMatrix#to_a` has inconsistent behavior: single-row matrices are
149
+ # converted to one-dimensional Arrays rather than a 2D Array with
150
+ # only one row. Patching `#to_a` directly is not feasible as the
151
+ # constructor seems to depend on it, and I have little interest in
152
+ # investigating further.
153
+ # @return [Array<Array>] a consistent array representation, such that
154
+ # `nmat.to_consistent_a.to_nm == nmat` holds for single-row matrices
155
+ def to_consistent_a
156
+ dim == 2 && shape[0] == 1 ? [to_a] : to_a
157
+ end
158
+ alias :to_ca :to_consistent_a
159
+ end
160
+
161
+ module NumericallyApproximatable
162
+ # Verifies if `self` and `other` are withing `epsilon` of each other.
163
+ # @param other [Numeric]
164
+ # @param epsilon [Numeric]
165
+ # @return [Boolean]
166
+ def approximates? other, epsilon=1e-5
167
+ # Used for testing and NMatrix#approximates?, should I move to spec_helper?
168
+ (self - other).abs < epsilon
169
+ end
170
+ end
171
+
172
+ module MatrixApproximatable
173
+ # Verifies if all values at corresponding indices approximate each other.
174
+ # @param other [NMatrix]
175
+ # @param epsilon [Float]
176
+ def approximates? other, epsilon=1e-5
177
+ return false unless self.shape == other.shape
178
+ # two ways to go here:
179
+ # - epsilon is aggregated: total cumulative accepted error
180
+ # => `(self - other).reduce(:+) < epsilon`
181
+ # - epsilon is local: per element accepted error
182
+ # => `v.approximates? other[*idxs], epsilon`
183
+ # Given the use I make (near-equality), I choose the first interpretation
184
+ # Note the second is sensitive to opposite signs balancing up
185
+ self.each_stored_with_indices.all? do |v,*idxs|
186
+ v.approximates? other[*idxs], epsilon
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ Array.include MachineLearningWorkbench::Monkey::Dimensionable
193
+ NMatrix.extend MachineLearningWorkbench::Monkey::Buildable
194
+ require 'nmatrix/lapack_plugin' # loads whichever is installed between atlas and lapacke
195
+ NMatrix.include MachineLearningWorkbench::Monkey::AdvancelyOperationable
196
+ Numeric.include MachineLearningWorkbench::Monkey::NumericallyApproximatable
197
+ NMatrix.include MachineLearningWorkbench::Monkey::MatrixApproximatable