RubyGems - machine_learning_workbench - Versions diffs - 0.1.0 - Mend

machine_learning_workbench 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

checksums.yaml +7 -0
data/.codeclimate.yml +15 -0
data/.gitignore +11 -0
data/.rspec +3 -0
data/.travis.yml +5 -0
data/Gemfile +6 -0
data/Gemfile.lock +70 -0
data/LICENSE.txt +21 -0
data/README.md +37 -0
data/Rakefile +6 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/lib/machine_learning_workbench.rb +19 -0
data/lib/machine_learning_workbench/compressor.rb +1 -0
data/lib/machine_learning_workbench/compressor/vector_quantization.rb +74 -0
data/lib/machine_learning_workbench/monkey.rb +197 -0
data/lib/machine_learning_workbench/neural_network.rb +3 -0
data/lib/machine_learning_workbench/neural_network/base.rb +211 -0
data/lib/machine_learning_workbench/neural_network/feed_forward.rb +20 -0
data/lib/machine_learning_workbench/neural_network/recurrent.rb +35 -0
data/lib/machine_learning_workbench/optimizer.rb +7 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/base.rb +112 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/bdnes.rb +104 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/snes.rb +40 -0
data/lib/machine_learning_workbench/optimizer/natural_evolution_strategies/xnes.rb +46 -0
data/lib/machine_learning_workbench/tools.rb +4 -0
data/lib/machine_learning_workbench/tools/execution.rb +18 -0
data/lib/machine_learning_workbench/tools/imaging.rb +48 -0
data/lib/machine_learning_workbench/tools/normalization.rb +22 -0
data/lib/machine_learning_workbench/tools/verification.rb +11 -0
data/machine_learning_workbench.gemspec +36 -0
metadata +216 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 7f8075c35b1a57c76c34e17ea94d9a2ff95bb5c1
+  data.tar.gz: c89aca6335ae3d3b15b08d8c0b0cb06f348d78cd
+SHA512:
+  metadata.gz: 7b6f1245dc746fe149cbf25a66f486482454a4a0b4e756cc0bbf66e7a65e90158cdeb054696311f267cf1054dabbe9fabc51ce2a6fffb76eb61d5febf51ec723
+  data.tar.gz: c111a7b0ada4aa24c3ad996f23791c28e1a37c1be4978ceecd694c07d4199fc3c9ec7a9ac7f4839eb1e81f84ef805d76df90038a9bd3f22e6510812086fa867e

data/.codeclimate.yml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+engines:
+  rubocop:
+    enabled: true
+  duplication:
+    enabled: true
+    config:
+      languages:
+      - ruby
+ratings:
+  paths:
+  - lib/**
+  - "**.rb"
+exclude_paths:
+  - spec/**

data/.gitignore ADDED Viewed

@@ -0,0 +1,11 @@
+/.bundle/
+/.yardoc
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+# rspec failure tracking
+.rspec_status

data/.rspec ADDED Viewed

@@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper

data/.travis.yml ADDED Viewed

@@ -0,0 +1,5 @@
+sudo: false
+language: ruby
+rvm:
+  - 2.4.2
+before_install: gem install bundler -v 1.16.0

data/Gemfile ADDED Viewed

@@ -0,0 +1,6 @@
+source "https://rubygems.org"
+git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
+# Specify your gem's dependencies in machine_learning_workbench.gemspec
+gemspec

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,70 @@
+PATH
+  remote: .
+  specs:
+    machine_learning_workbench (0.0.0)
+      nmatrix-atlas (~> 0.2)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    backports (3.11.1)
+    binding_of_caller (0.8.0)
+      debug_inspector (>= 0.0.1)
+    coderay (1.1.2)
+    debug_inspector (0.0.3)
+    diff-lcs (1.3)
+    interception (0.5)
+    method_source (0.8.2)
+    nmatrix (0.2.4)
+      packable (~> 1.3, >= 1.3.5)
+    nmatrix-atlas (0.2.4)
+      nmatrix (= 0.2.4)
+    packable (1.3.9)
+      backports
+    parallel (1.12.1)
+    pry (0.10.4)
+      coderay (~> 1.1.0)
+      method_source (~> 0.8.1)
+      slop (~> 3.4)
+    pry-nav (0.2.4)
+      pry (>= 0.9.10, < 0.11.0)
+    pry-rescue (1.4.5)
+      interception (>= 0.5)
+      pry
+    pry-stack_explorer (0.4.9.2)
+      binding_of_caller (>= 0.7)
+      pry (>= 0.9.11)
+    rake (10.5.0)
+    rmagick (2.16.0)
+    rspec (3.7.0)
+      rspec-core (~> 3.7.0)
+      rspec-expectations (~> 3.7.0)
+      rspec-mocks (~> 3.7.0)
+    rspec-core (3.7.1)
+      rspec-support (~> 3.7.0)
+    rspec-expectations (3.7.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.7.0)
+    rspec-mocks (3.7.0)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.7.0)
+    rspec-support (3.7.1)
+    slop (3.6.0)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  bundler (~> 1.16)
+  machine_learning_workbench!
+  parallel
+  pry (~> 0.10)
+  pry-nav (~> 0.2)
+  pry-rescue (~> 1.4)
+  pry-stack_explorer (~> 0.4)
+  rake (~> 10.0)
+  rmagick
+  rspec (~> 3.0)
+BUNDLED WITH
+   1.16.1

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2018 Giuseppe Cuccu
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,37 @@
+# Machine Learning Workbench
+This workbench holds a collection of machine learning methods in Ruby. Rather than specializing on a single task or method, this gem aims at providing an encompassing framework for any machine learning application.
+## Installation
+Add this line to your application's Gemfile:
+```ruby
+gem 'machine_learning_workbench'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install machine_learning_workbench
+## Usage
+TODO: Write usage instructions here
+## Development
+After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
+To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
+## Contributing
+Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/machine_learning_workbench.
+## License
+The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).

data/Rakefile ADDED Viewed

@@ -0,0 +1,6 @@
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec

data/bin/console ADDED Viewed

@@ -0,0 +1,14 @@
+#!/usr/bin/env ruby
+require "bundler/setup"
+require "machine_learning_workbench"
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+require "pry"
+Pry.start
+# require "irb"
+# IRB.start(__FILE__)

data/bin/setup ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+set -vx
+bundle install
+# Do any other automated setup that you need to do here

data/lib/machine_learning_workbench.rb ADDED Viewed

@@ -0,0 +1,19 @@
+require 'nmatrix'
+module MachineLearningWorkbench
+  module Compressor
+  end
+  module NeuralNetwork
+  end
+  module Optimizer
+  end
+  end
+  module Tools
+  end
+end
+require_relative 'machine_learning_workbench/monkey'
+require_relative 'machine_learning_workbench/tools'
+require_relative 'machine_learning_workbench/compressor'
+require_relative 'machine_learning_workbench/neural_network'
+require_relative 'machine_learning_workbench/optimizer'

data/lib/machine_learning_workbench/compressor.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require_relative 'compressor/vector_quantization'

data/lib/machine_learning_workbench/compressor/vector_quantization.rb ADDED Viewed

@@ -0,0 +1,74 @@
+module MachineLearningWorkbench::Compressor
+  class VectorQuantization
+    attr_reader :ncentrs, :centrs, :dims, :vrange, :dtype, :lrate, :rng
+    Verification = MachineLearningWorkbench::Tools::Verification
+    def initialize ncentrs:, dims:, vrange:, dtype:, lrate:, rseed: Random.new_seed
+      @rng = Random.new rseed
+      @ncentrs = ncentrs
+      @dtype = dtype
+      @dims = dims
+      @lrate = lrate
+      @vrange = case vrange
+      when Array
+        raise ArgumentError, "vrange size not 2: #{vrange}" unless vrange.size == 2
+        vrange.map &method(:Float)
+      when Range
+        [vrange.first, vrange.last].map &method(:Float)
+      else
+        raise ArgumentError, "vrange: unrecognized type: #{vrange.class}"
+      end
+      @centrs = ncentrs.times.map { new_centr }
+    end
+    # Creates a new (random) centroid
+    def new_centr
+      NMatrix.new(dims, dtype: dtype) { rng.rand Range.new *vrange }
+    end
+    # Computes similarities between image and all centroids
+    def similarities img
+      raise NotImplementedError if img.shape.size > 1
+      # centrs.map { |c| c.dot(img).first }
+      require 'parallel'
+      Parallel.map(centrs) { |c| c.dot(img).first }
+    end
+    # The list of similarities also constitutes the encoding of the image
+    alias encode similarities
+    # Returns index and similitude of most similar centroid to image
+    def most_similar_centr img
+      simils = similarities img
+      max_simil = simils.max
+      max_idx = simils.index max_simil
+      [max_idx, max_simil]
+    end
+    # Reconstruct image as its most similar centroid
+    def reconstruction img
+      centrs[most_similar_centr(img).first]
+    end
+    # Per-pixel errors in reconstructing image
+    def reconstr_error img
+      reconstruction(img) - img
+    end
+    # Train on one image
+    def train_one img, simils: nil
+      trg_idx, _simil = simils || most_similar_centr(img)
+      centrs[trg_idx] = centrs[trg_idx] * (1-lrate) + img * lrate
+      Verification.in_range! centrs[trg_idx], vrange
+      centrs[trg_idx]
+    end
+    # Train on image list
+    def train img_lst, debug: false
+      # Two ways here:
+      # - Batch: canonical, centrs updated with each img
+      # - Parallel: could be parallel either on simils or on training (?)
+      # Unsure on the correctness of either Parallel, let's stick with Batch
+      img_lst.each { |img| train_one img; print '.' if debug }
+    end
+  end
+end

data/lib/machine_learning_workbench/monkey.rb ADDED Viewed

@@ -0,0 +1,197 @@
+# Monkey patches
+module MachineLearningWorkbench::Monkey
+  module Dimensionable
+    def dims ret: []
+      ret << size
+      if first.kind_of? Array
+        # hypothesize all elements having same size and save some checks
+        first.dims ret: ret
+      else
+        ret
+      end
+    end
+  end
+  module Buildable
+    def new *args
+      super.tap do |m|
+        if block_given?
+          m.each_stored_with_indices do |_,*idxs|
+            m[*idxs] = yield *idxs
+          end
+        end
+      end
+    end
+  end
+  module AdvancelyOperationable # how am I supposed to name these things??
+    # Outer matrix relationship generalization.
+    # Make a matrix the same shape as `self`; each element is a matrix,
+    # with the same shape as `other`, resulting from the interaction of
+    # the corresponding element in `self` and all the elements in `other`.
+    # @param other [NMatrix] other matrix
+    # @note This implementation works only for 2D matrices (same as most
+    #   other methods here). It's a quick hack, a proof of concept barely
+    #   sufficient for my urgent needs.
+    # @note Output size is fixed! Since NMatrix does not graciously yield to
+    #   being composed of other NMatrices (by adapting the shape of the root
+    #   matrix), the block cannot return matrices in there.
+    # @return [NMatrix]
+    def outer other
+      # NOTE: Map of map in NMatrix does not work as expected!
+      # self.map { |v1| other.map { |v2| yield(v1,v2) } }
+      # NOTE: this doesn't cut it either... can't capture the structure
+      # NMatrix[ *self.collect { |v1| other.collect { |v2| yield(v1,v2) } } ]
+      raise ArgumentError unless block_given?
+      NMatrix.new(self.shape+other.shape).tap do |m|
+        each_stored_with_indices do |v1,r1,c1|
+          other.each_stored_with_indices do |v2,r2,c2|
+            m[r1,c1,r2,c2] = yield(v1,v2)
+          end
+        end
+      end
+    end
+    # Flat-output generalized outer relationship. Same as `#outer`, but the
+    # result is a 2-dim matrix of the interactions between all the elements
+    # in `self` (as rows) and all the elements in `other` (as columns)
+    # @param other [NMatrix] other matrix
+    # @return [NMatrix]
+    def outer_flat other
+      raise ArgumentError unless block_given?
+      data = collect { |v1| other.collect { |v2| yield(v1, v2) } }
+      self.class[*data, dtype: dtype]
+    end
+    # Matrix exponential: `e^self` (not to be confused with `self^n`!)
+    # @return [NMatrix]
+    def exponential
+      # special case: one-dimensional matrix: just exponentiate the values
+      if (dim == 1) || (dim == 2 && shape.include?(1))
+        return NMatrix.new shape, collect(&Math.method(:exp)), dtype: dtype
+      end
+      # Eigenvalue decomposition method from scipy/linalg/matfuncs.py#expm2
+      # TODO: find out why can't I get away without double transpose!
+      e_values, e_vectors = eigen_symm
+      e_vals_exp_dmat = NMatrix.diagonal e_values.collect(&Math.method(:exp))
+      # ASSUMING WE'RE ONLY USING THIS TO EXPONENTIATE LOG_SIGMA IN XNES
+      # Theoretically we need the right eigenvectors, which for a symmetric
+      # matrix should be just transposes of the eigenvectors.
+      # But we have a positive definite matrix, so the final composition
+      # below holds without transposing
+      # BUT, strangely, I can't seem to get eigen_symm to green the tests
+      # ...with or without transpose
+      # e_vectors = e_vectors.transpose
+      e_vectors.dot(e_vals_exp_dmat).dot(e_vectors.invert)#.transpose
+    end
+    # Calculate matrix eigenvalues and eigenvectors using LAPACK
+    # @param which [:both, :left, :right] which eigenvectors do you want?
+    # @return [Array<NMatrix, NMatrix[, NMatrix]>]
+    #   eigenvalues (as column vector), left eigenvectors, right eigenvectors.
+    #   A value different than `:both` for param `which` reduces the return size.
+    # @note requires LAPACK
+    # @note WARNING! a param `which` different than :both alters the returns
+    # @note WARNING! machine-precision-error imaginary part Complex
+    # often returned! For symmetric matrices use #eigen_symm_right below
+    def eigen which=:both
+      raise ArgumentError unless [:both, :left, :right].include? which
+      NMatrix::LAPACK.geev(self, which)
+    end
+    # Eigenvalues and right eigenvectors for symmetric matrices using LAPACK
+    # @note code taken from gem `nmatrix-atlas` NMatrix::LAPACK#geev
+    # @note FOR SYMMETRIC MATRICES ONLY!!
+    # @note WARNING: will return real matrices, imaginary parts are discarded!
+    # @note WARNING: only left eigenvectors will be returned!
+    # @todo could it be possible to save some of the transpositions?
+    # @return [Array<NMatrix, NMatrix>] eigenvalues and (left) eigenvectors
+    def eigen_symm
+      # TODO: check for symmetry if not too slow
+      raise TypeError, "Only real-valued matrices" if complex_dtype?
+      raise StorageTypeError, "Only dense matrices (because LAPACK)" unless dense?
+      raise ShapeError, "Only square matrices" unless dim == 2 && shape[0] == shape[1]
+      n = shape[0]
+      # Outputs
+      e_values = NMatrix.new([n, 1], dtype: dtype)
+      e_values_img = NMatrix.new([n, 1], dtype: dtype) # to satisfy C alloc
+      e_vectors = clone_structure
+      NMatrix::LAPACK::lapack_geev(
+        false,        # compute left eigenvectors of A?
+        :t,           # compute right eigenvectors of A? (left eigenvectors of A**T)
+        n,            # order of the matrix
+        transpose,    # input matrix => needs to be column-wise  # self,
+        n,            # leading dimension of matrix
+        e_values,     # real part of computed eigenvalues
+        e_values_img, # imaginary part of computed eigenvalues (will be discarded)
+        nil,          # left eigenvectors, if applicable
+        n,            # leading dimension of left_output
+        e_vectors,    # right eigenvectors, if applicable
+        n,            # leading dimension of right_output
+        2*n           # no clue what's this
+      )
+      raise "Uhm why complex eigenvalues?" if e_values_img.any? {|v| v>1e-10}
+      return [e_values, e_vectors.transpose]
+    end
+    # `NMatrix#to_a` has inconsistent behavior: single-row matrices are
+    # converted to one-dimensional Arrays rather than a 2D Array with
+    # only one row. Patching `#to_a` directly is not feasible as the
+    # constructor seems to depend on it, and I have little interest in
+    # investigating further.
+    # @return [Array<Array>] a consistent array representation, such that
+    #   `nmat.to_consistent_a.to_nm == nmat` holds for single-row matrices
+    def to_consistent_a
+      dim == 2 && shape[0] == 1 ? [to_a] : to_a
+    end
+    alias :to_ca :to_consistent_a
+  end
+  module NumericallyApproximatable
+    # Verifies if `self` and `other` are withing `epsilon` of each other.
+    # @param other [Numeric]
+    # @param epsilon [Numeric]
+    # @return [Boolean]
+    def approximates? other, epsilon=1e-5
+      # Used for testing and NMatrix#approximates?, should I move to spec_helper?
+      (self - other).abs < epsilon
+    end
+  end
+  module MatrixApproximatable
+    # Verifies if all values at corresponding indices approximate each other.
+    # @param other [NMatrix]
+    # @param epsilon [Float]
+    def approximates? other, epsilon=1e-5
+      return false unless self.shape == other.shape
+      # two ways to go here:
+      # - epsilon is aggregated: total cumulative accepted error
+      #   => `(self - other).reduce(:+) < epsilon`
+      # - epsilon is local: per element accepted error
+      #   => `v.approximates? other[*idxs], epsilon`
+      # Given the use I make (near-equality), I choose the first interpretation
+      # Note the second is sensitive to opposite signs balancing up
+      self.each_stored_with_indices.all? do |v,*idxs|
+        v.approximates? other[*idxs], epsilon
+      end
+    end
+  end
+end
+Array.include MachineLearningWorkbench::Monkey::Dimensionable
+NMatrix.extend MachineLearningWorkbench::Monkey::Buildable
+require 'nmatrix/lapack_plugin' # loads whichever is installed between atlas and lapacke
+NMatrix.include MachineLearningWorkbench::Monkey::AdvancelyOperationable
+Numeric.include MachineLearningWorkbench::Monkey::NumericallyApproximatable
+NMatrix.include MachineLearningWorkbench::Monkey::MatrixApproximatable