RubyGems - machine_learning_workbench - Versions diffs - 0.1.0 - Mend

machine_learning_workbench 0.1.0

Files changed (32) hide show

checksums.yaml +7 -0
data/.codeclimate.yml +15 -0
data/.gitignore +11 -0
data/.rspec +3 -0
data/.travis.yml +5 -0
data/Gemfile +6 -0
data/Gemfile.lock +70 -0
data/LICENSE.txt +21 -0
data/README.md +37 -0
data/Rakefile +6 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/lib/machine_learning_workbench.rb +19 -0
data/lib/machine_learning_workbench/compressor.rb +1 -0
data/lib/machine_learning_workbench/compressor/vector_quantization.rb +74 -0
data/lib/machine_learning_workbench/monkey.rb +197 -0
data/lib/machine_learning_workbench/neural_network.rb +3 -0
data/lib/machine_learning_workbench/neural_network/base.rb +211 -0
data/lib/machine_learning_workbench/neural_network/feed_forward.rb +20 -0
data/lib/machine_learning_workbench/neural_network/recurrent.rb +35 -0
data/lib/machine_learning_workbench/optimizer.rb +7 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +112 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb +104 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb +40 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb +46 -0
data/lib/machine_learning_workbench/tools.rb +4 -0
data/lib/machine_learning_workbench/tools/execution.rb +18 -0
data/lib/machine_learning_workbench/tools/imaging.rb +48 -0
data/lib/machine_learning_workbench/tools/normalization.rb +22 -0
data/lib/machine_learning_workbench/tools/verification.rb +11 -0
data/machine_learning_workbench.gemspec +36 -0
metadata +216 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 7f8075c35b1a57c76c34e17ea94d9a2ff95bb5c1
+  data.tar.gz: c89aca6335ae3d3b15b08d8c0b0cb06f348d78cd
+SHA512:
+  metadata.gz: 7b6f1245dc746fe149cbf25a66f486482454a4a0b4e756cc0bbf66e7a65e90158cdeb054696311f267cf1054dabbe9fabc51ce2a6fffb76eb61d5febf51ec723
+  data.tar.gz: c111a7b0ada4aa24c3ad996f23791c28e1a37c1be4978ceecd694c07d4199fc3c9ec7a9ac7f4839eb1e81f84ef805d76df90038a9bd3f22e6510812086fa867e

data/.codeclimate.yml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+engines:
+  rubocop:
+    enabled: true
+  duplication:
+    enabled: true
+    config:
+      languages:
+      - ruby
+ratings:
+  paths:
+  - lib/**
+  - "**.rb"
+exclude_paths:
+  - spec/**

data/.gitignore ADDED Viewed

@@ -0,0 +1,11 @@
+/.bundle/
+/.yardoc
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+# rspec failure tracking
+.rspec_status

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper

data/.travis.yml ADDED Viewed

@@ -0,0 +1,5 @@
+sudo: false
+language: ruby
+rvm:
+  - 2.4.2
+before_install: gem install bundler -v 1.16.0

data/Gemfile ADDED Viewed

@@ -0,0 +1,6 @@
+source "https://rubygems.org"
+git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
+# Specify your gem's dependencies in machine_learning_workbench.gemspec
+gemspec

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,70 @@
+PATH
+  remote: .
+  specs:
+    machine_learning_workbench (0.0.0)
+      nmatrix-atlas (~> 0.2)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    backports (3.11.1)
+    binding_of_caller (0.8.0)
+      debug_inspector (>= 0.0.1)
+    coderay (1.1.2)
+    debug_inspector (0.0.3)
+    diff-lcs (1.3)
+    interception (0.5)
+    method_source (0.8.2)
+    nmatrix (0.2.4)
+      packable (~> 1.3, >= 1.3.5)
+    nmatrix-atlas (0.2.4)
+      nmatrix (= 0.2.4)
+    packable (1.3.9)
+      backports
+    parallel (1.12.1)
+    pry (0.10.4)
+      coderay (~> 1.1.0)
+      method_source (~> 0.8.1)
+      slop (~> 3.4)
+    pry-nav (0.2.4)
+      pry (>= 0.9.10, < 0.11.0)
+    pry-rescue (1.4.5)
+      interception (>= 0.5)
+      pry
+    pry-stack_explorer (0.4.9.2)
+      binding_of_caller (>= 0.7)
+      pry (>= 0.9.11)
+    rake (10.5.0)
+    rmagick (2.16.0)
+    rspec (3.7.0)
+      rspec-core (~> 3.7.0)
+      rspec-expectations (~> 3.7.0)
+      rspec-mocks (~> 3.7.0)
+    rspec-core (3.7.1)
+      rspec-support (~> 3.7.0)
+    rspec-expectations (3.7.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.7.0)
+    rspec-mocks (3.7.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.7.0)
+    rspec-support (3.7.1)
+    slop (3.6.0)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  bundler (~> 1.16)
+  machine_learning_workbench!
+  parallel
+  pry (~> 0.10)
+  pry-nav (~> 0.2)
+  pry-rescue (~> 1.4)
+  pry-stack_explorer (~> 0.4)
+  rake (~> 10.0)
+  rmagick
+  rspec (~> 3.0)
+BUNDLED WITH
+   1.16.1

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2018 Giuseppe Cuccu
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,37 @@
+# Machine Learning Workbench
+This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'machine_learning_workbench'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install machine_learning_workbench
+## Usage
+TODO: Write usage instructions here
+## Development
+After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+## Contributing
+Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/machine_learning_workbench.
+## License
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec

data/bin/console ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "machine_learning_workbench"
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+require "pry"
+Pry.start
+# require "irb"
+# IRB.start(__FILE__)

data/bin/setup ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+set -vx
+bundle install
+# Do any other automated setup that you need to do here

data/lib/machine_learning_workbench.rb ADDED Viewed

@@ -0,0 +1,19 @@
+require 'nmatrix'
+module MachineLearningWorkbench
+  module Compressor
+  end
+  module NeuralNetwork
+  end
+  module Optimizer
+  end
+  end
+  module Tools
+  end
+end
+require_relative 'machine_learning_workbench/monkey'
+require_relative 'machine_learning_workbench/tools'
+require_relative 'machine_learning_workbench/compressor'
+require_relative 'machine_learning_workbench/neural_network'
+require_relative 'machine_learning_workbench/optimizer'

data/lib/machine_learning_workbench/compressor.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require_relative 'compressor/vector_quantization'

data/lib/machine_learning_workbench/compressor/vector_quantization.rb ADDED Viewed

@@ -0,0 +1,74 @@
+module MachineLearningWorkbench::Compressor
+  class VectorQuantization
+    attr_reader :ncentrs, :centrs, :dims, :vrange, :dtype, :lrate, :rng
+    Verification = MachineLearningWorkbench::Tools::Verification
+    def initialize ncentrs:, dims:, vrange:, dtype:, lrate:, rseed: Random.new_seed
+      @rng = Random.new rseed
+      @ncentrs = ncentrs
+      @dtype = dtype
+      @dims = dims
+      @lrate = lrate
+      @vrange = case vrange
+      when Array
+        raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
+        vrange.map &method(:Float)
+      when Range
+        [vrange.first, vrange.last].map &method(:Float)
+      else
+        raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
+      end
+      @centrs = ncentrs.times.map { new_centr }
+    end
+    # Creates a new (random) centroid
+    def new_centr
+      NMatrix.new(dims, dtype: dtype) { rng.rand Range.new *vrange }
+    end
+    # Computes similarities between image and all centroids
+    def similarities img
+      raise NotImplementedError if img.shape.size > 1
+      # centrs.map { |c| c.dot(img).first }
+      require 'parallel'
+      Parallel.map(centrs) { |c| c.dot(img).first }
+    end
+    # The list of similarities also constitutes the encoding of the image
+    alias encode similarities
+    # Returns index and similitude of most similar centroid to image
+    def most_similar_centr img
+      simils = similarities img
+      max_simil = simils.max
+      max_idx = simils.index max_simil
+      [max_idx, max_simil]
+    end
+    # Reconstruct image as its most similar centroid
+    def reconstruction img
+      centrs[most_similar_centr(img).first]
+    end
+    # Per-pixel errors in reconstructing image
+    def reconstr_error img
+      reconstruction(img) - img
+    end
+    # Train on one image
+    def train_one img, simils: nil
+      trg_idx, _simil = simils || most_similar_centr(img)
+      centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + img * lrate
+      Verification.in_range! centrs[trg_idx], vrange
+      centrs[trg_idx]
+    end
+    # Train on image list
+    def train img_lst, debug: false
+      # Two ways here:
+      # - Batch: canonical, centrs updated with each img
+      # - Parallel: could be parallel either on simils or on training (?)
+      # Unsure on the correctness of either Parallel, let's stick with Batch
+      img_lst.each { |img| train_one img; print '.' if debug }
+    end
+  end
+end

data/lib/machine_learning_workbench/monkey.rb ADDED Viewed

@@ -0,0 +1,197 @@
+# Monkey patches
+module MachineLearningWorkbench::Monkey
+  module Dimensionable
+    def dims ret: []
+      ret << size
+      if first.kind_of? Array
+        # hypothesize all elements having same size and save some checks
+        first.dims ret: ret
+      else
+        ret
+      end
+    end
+  end
+  module Buildable
+    def new *args
+      super.tap do |m|
+        if block_given?
+          m.each_stored_with_indices do |_,*idxs|
+            m[*idxs] = yield *idxs
+          end
+        end
+      end
+    end
+  end
+  module AdvancelyOperationable # how am I supposed to name these things??
+    # Outer matrix relationship generalization.
+    # Make a matrix the same shape as `self`; each element is a matrix,
+    # with the same shape as `other`, resulting from the interaction of
+    # the corresponding element in `self` and all the elements in `other`.
+    # @param other [NMatrix] other matrix
+    # @note This implementation works only for 2D matrices (same as most
+    #   other methods here). It's a quick hack, a proof of concept barely
+    #   sufficient for my urgent needs.
+    # @note Output size is fixed! Since NMatrix does not graciously yield to
+    #   being composed of other NMatrices (by adapting the shape of the root
+    #   matrix), the block cannot return matrices in there.
+    # @return [NMatrix]
+    def outer other
+      # NOTE: Map of map in NMatrix does not work as expected!
+      # self.map { |v1| other.map { |v2| yield(v1,v2) } }
+      # NOTE: this doesn't cut it either... can't capture the structure
+      # NMatrix[ *self.collect { |v1| other.collect { |v2| yield(v1,v2) } } ]
+      raise ArgumentError unless block_given?
+      NMatrix.new(self.shape+other.shape).tap do |m|
+        each_stored_with_indices do |v1,r1,c1|
+          other.each_stored_with_indices do |v2,r2,c2|
+            m[r1,c1,r2,c2] = yield(v1,v2)
+          end
+        end
+      end
+    end
+    # Flat-output generalized outer relationship. Same as `#outer`, but the
+    # result is a 2-dim matrix of the interactions between all the elements
+    # in `self` (as rows) and all the elements in `other` (as columns)
+    # @param other [NMatrix] other matrix
+    # @return [NMatrix]
+    def outer_flat other
+      raise ArgumentError unless block_given?
+      data = collect { |v1| other.collect { |v2| yield(v1, v2) } }
+      self.class[*data, dtype: dtype]
+    end
+    # Matrix exponential: `e^self` (not to be confused with `self^n`!)
+    # @return [NMatrix]
+    def exponential
+      # special case: one-dimensional matrix: just exponentiate the values
+      if (dim == 1) || (dim == 2 && shape.include?(1))
+        return NMatrix.new shape, collect(&Math.method(:exp)), dtype: dtype
+      end
+      # Eigenvalue decomposition method from scipy/linalg/matfuncs.py#expm2
+      # TODO: find out why can't I get away without double transpose!
+      e_values, e_vectors = eigen_symm
+      e_vals_exp_dmat = NMatrix.diagonal e_values.collect(&Math.method(:exp))
+      # ASSUMING WE'RE ONLY USING THIS TO EXPONENTIATE LOG_SIGMA IN XNES
+      # Theoretically we need the right eigenvectors, which for a symmetric
+      # matrix should be just transposes of the eigenvectors.
+      # But we have a positive definite matrix, so the final composition
+      # below holds without transposing
+      # BUT, strangely, I can't seem to get eigen_symm to green the tests
+      # ...with or without transpose
+      # e_vectors = e_vectors.transpose
+      e_vectors.dot(e_vals_exp_dmat).dot(e_vectors.invert)#.transpose
+    end
+    # Calculate matrix eigenvalues and eigenvectors using LAPACK
+    # @param which [:both, :left, :right] which eigenvectors do you want?
+    # @return [Array<NMatrix, NMatrix[, NMatrix]>]
+    #   eigenvalues (as column vector), left eigenvectors, right eigenvectors.
+    #   A value different than `:both` for param `which` reduces the return size.
+    # @note requires LAPACK
+    # @note WARNING! a param `which` different than :both alters the returns
+    # @note WARNING! machine-precision-error imaginary part Complex
+    # often returned! For symmetric matrices use #eigen_symm_right below
+    def eigen which=:both
+      raise ArgumentError unless [:both, :left, :right].include? which
+      NMatrix::LAPACK.geev(self, which)
+    end
+    # Eigenvalues and right eigenvectors for symmetric matrices using LAPACK
+    # @note code taken from gem `nmatrix-atlas` NMatrix::LAPACK#geev
+    # @note FOR SYMMETRIC MATRICES ONLY!!
+    # @note WARNING: will return real matrices, imaginary parts are discarded!
+    # @note WARNING: only left eigenvectors will be returned!
+    # @todo could it be possible to save some of the transpositions?
+    # @return [Array<NMatrix, NMatrix>] eigenvalues and (left) eigenvectors
+    def eigen_symm
+      # TODO: check for symmetry if not too slow
+      raise TypeError, "Only real-valued matrices" if complex_dtype?
+      raise StorageTypeError, "Only dense matrices (because LAPACK)" unless dense?
+      raise ShapeError, "Only square matrices" unless dim == 2 && shape[0] == shape[1]
+      n = shape[0]
+      # Outputs
+      e_values = NMatrix.new([n, 1], dtype: dtype)
+      e_values_img = NMatrix.new([n, 1], dtype: dtype) # to satisfy C alloc
+      e_vectors = clone_structure
+      NMatrix::LAPACK::lapack_geev(
+        false,        # compute left eigenvectors of A?
+        :t,           # compute right eigenvectors of A? (left eigenvectors of A**T)
+        n,            # order of the matrix
+        transpose,    # input matrix => needs to be column-wise  # self,
+        n,            # leading dimension of matrix
+        e_values,     # real part of computed eigenvalues
+        e_values_img, # imaginary part of computed eigenvalues (will be discarded)
+        nil,          # left eigenvectors, if applicable
+        n,            # leading dimension of left_output
+        e_vectors,    # right eigenvectors, if applicable
+        n,            # leading dimension of right_output
+        2*n           # no clue what's this
+      )
+      raise "Uhm why complex eigenvalues?" if e_values_img.any? {|v| v>1e-10}
+      return [e_values, e_vectors.transpose]
+    end
+    # `NMatrix#to_a` has inconsistent behavior: single-row matrices are
+    # converted to one-dimensional Arrays rather than a 2D Array with
+    # only one row. Patching `#to_a` directly is not feasible as the
+    # constructor seems to depend on it, and I have little interest in
+    # investigating further.
+    # @return [Array<Array>] a consistent array representation, such that
+    #   `nmat.to_consistent_a.to_nm == nmat` holds for single-row matrices
+    def to_consistent_a
+      dim == 2 && shape[0] == 1 ? [to_a] : to_a
+    end
+    alias :to_ca :to_consistent_a
+  end
+  module NumericallyApproximatable
+    # Verifies if `self` and `other` are withing `epsilon` of each other.
+    # @param other [Numeric]
+    # @param epsilon [Numeric]
+    # @return [Boolean]
+    def approximates? other, epsilon=1e-5
+      # Used for testing and NMatrix#approximates?, should I move to spec_helper?
+      (self - other).abs < epsilon
+    end
+  end
+  module MatrixApproximatable
+    # Verifies if all values at corresponding indices approximate each other.
+    # @param other [NMatrix]
+    # @param epsilon [Float]
+    def approximates? other, epsilon=1e-5
+      return false unless self.shape == other.shape
+      # two ways to go here:
+      # - epsilon is aggregated: total cumulative accepted error
+      #   => `(self - other).reduce(:+) < epsilon`
+      # - epsilon is local: per element accepted error
+      #   => `v.approximates? other[*idxs], epsilon`
+      # Given the use I make (near-equality), I choose the first interpretation
+      # Note the second is sensitive to opposite signs balancing up
+      self.each_stored_with_indices.all? do |v,*idxs|
+        v.approximates? other[*idxs], epsilon
+      end
+    end
+  end
+end
+Array.include MachineLearningWorkbench::Monkey::Dimensionable
+NMatrix.extend MachineLearningWorkbench::Monkey::Buildable
+require 'nmatrix/lapack_plugin' # loads whichever is installed between atlas and lapacke
+NMatrix.include MachineLearningWorkbench::Monkey::AdvancelyOperationable
+Numeric.include MachineLearningWorkbench::Monkey::NumericallyApproximatable
+NMatrix.include MachineLearningWorkbench::Monkey::MatrixApproximatable