RubyGems - lr-linear-regression - Versions diffs - 0.1.0 - Mend

lr-linear-regression 0.1.0

Files changed (3) hide show

checksums.yaml +7 -0
data/lib/lr_linear_regression.rb +158 -0
metadata +68 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: e1724a5b3d466f518f717d0b090ec8e25a524f1d317e9f872340bea1bd273f52
+  data.tar.gz: 8e44722f9a20bc76917c2321bc0f0189a7a29b253dced311bb1056921625e2cf
+SHA512:
+  metadata.gz: ee2dfd794d0de362e8987f6f2442876c0c1115b46a445e2317583fbc444ace1122b9fea409fbffaea6d9ed6e52c3567e5ffd8ff5e977636fcef22c3451d0d4f8
+  data.tar.gz: 28525aa869276260b76aa13a2959c69f84bb16097e6b13e7f396de61e9377cd849df566435083c09f6345e90aa820a16b3dd5a1ba78d1d13b5860204c96ec579

data/lib/lr_linear_regression.rb ADDED Viewed

@@ -0,0 +1,158 @@
+require 'matrix'
+# RubyLinearRegression
+class RubyLinearRegression
+  attr_reader :x,:y,:theta,:mu,:sigma, :lambda, :normalize
+  def initialize
+    @mu = 0
+    @sigma = 1
+  end
+  # Loads and normalizes the training data, must be called prior to training.
+  # Arguments:
+  #   x_data: (Two dimensiolnal array with the independent variables of your training data)
+  #   y_data: (Array with the dependent variables of your training data)
+  def load_training_data x_data, y_data, normalize = true
+        @normalize = normalize
+        # normalize the x_data
+        x_data = normalize_data( x_data ) if @normalize
+        # add 1 column to our data
+        x_data = x_data.map { |r| [1].concat(r) }
+        # build our x Matrix & y Vector
+        @x = Matrix.rows( x_data )
+        @y = Matrix.rows( y_data.collect { |e| [e] } )
+        @theta = Matrix.zero(@x.column_size, 1)
+  end
+  # Compute the mean squared cost / error function
+  def compute_cost test_x = nil, test_y = nil
+    if not test_x.nil?
+      test_x.each_index do |row|
+        test_x[row].each_index do |i|
+          test_x[row][i] = (test_x[row][i] - @mu[i]) / @sigma[i].to_f
+        end
+      end if @normalize
+      test_x = test_x.map { |r| [1].concat(r) }
+    end
+    # per default use training data to compute cost if no data is given
+    cost_x = test_x.nil? ? @x : Matrix.rows( test_x )
+    cost_y = test_y.nil? ? @y : Matrix.rows( test_y.collect { |e| [e] } )
+    # First use matrix multiplication and vector subtracton to find errors
+    errors = (cost_x * @theta) - cost_y
+    # Then square all errors
+    errors = errors.map { |e| (e.to_f**2)  }
+    # Find the mean of the square errors
+    mean_square_error = 0.5 * (errors.inject{ |sum, e| sum + e }.to_f / errors.row_size)
+    return mean_square_error
+  end
+  # Calculate the optimal theta using the normal equation
+  def train_normal_equation l = 0
+    @lambda = l
+    lambda_matrix = Matrix.build(@theta.row_size,@theta.row_size) do |c,r|
+        (( c == 0 && r == 0) || c != r) ? 0 : 1;
+      end
+    # Calculate the optimal theta using the normal equation
+    # theta = ( X' * X )^1 * X' * y
+    @theta = (@x.transpose * @x + @lambda * lambda_matrix ).inverse * @x.transpose * @y
+    return @theta
+  end
+  # Calculate optimal theta using gradient descent
+  # Arguments:
+  #   alpha: Learning rate
+  #   iterations: Number of iterations to run gradient descent
+  #   verbose: If true will output cost after each iteration, can be used to find optimal learning rate (alpha) and iteration
+  def train_gradient_descent( alpha = 0.01, iterations = 500, verbose = false )
+    0.upto( iterations ) do |i|
+      @temp_theta = Array.new(@theta.row_size)
+      0.upto(@theta.row_size-1) do |row|
+        @temp_theta[row] = @theta[row,0] - alpha * compute_gradient(row)
+      end
+      @theta = Matrix.columns([@temp_theta])
+      puts "Cost after #{i} iterations = #{compute_cost}" if verbose
+    end
+  end
+  # Makes a prediction based on your trained model.
+  # train_normal_equation must be called prior to making a prediction.
+  #
+  # Arguments:
+  #   data: (Array of independent variables to base your prediction on)
+  def predict data
+    # normalize
+    data.each_index do |i|
+      data[i] = (data[i] - @mu[i]) / @sigma[i].to_f
+    end if @normalize
+    # add 1 column to prediction data
+    data = [1].concat( data )
+    # perform prediction
+    prediction = (Matrix[data] * @theta)[0,0].to_f
+    return prediction
+  end
+  private
+    def normalize_data(x_data, mu = nil, sigma = nil)
+      row_size = x_data.size
+      column_count = x_data[0].is_a?( Array) ? x_data[0].size : 1
+      x_norm = Array.new(row_size)
+      @mu = Array.new(column_count)
+      @sigma = Array.new(column_count)
+      0.upto(column_count - 1) do |column|
+        column_data = x_data.map{ |e| e[column] }
+        @mu[column] = column_data.inject{ |sum, e| sum + e } / row_size
+        @sigma[column] = (column_data.max - column_data.min)
+      end
+      0.upto(row_size-1) do |row|
+        row_data = x_data[row]
+        x_norm[row] = Array.new(column_count)
+        row_data.each_index do |i|
+          x_norm[row][i] = (row_data[i] - @mu[i]) / @sigma[i].to_f
+        end
+      end
+      return x_norm
+    end
+    # Compute the mean squared cost / error function
+    def compute_gradient( parameter )
+      # First use matrix multiplication and vector subtracton to find errors
+      gradients = ((@x * @theta) - @y).transpose * @x.column(parameter)
+      # Mean the grandient
+      mean = gradients.inject{ |sum, e| sum + e } / gradients.size
+      return mean
+    end
+end

metadata ADDED Viewed

@@ -0,0 +1,68 @@
+--- !ruby/object:Gem::Specification
+name: lr-linear-regression
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- LOUARN Ronan
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2019-01-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.10'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 5.10.2
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.10'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 5.10.2
+description: |-
+  An implementation of a linear regression machine learning algorithm implemented in Ruby.
+    The library supports simple problems with one independent variable used to predict a dependent variable as well as multivariate problems with multiple independent variables to predict a dependent variable.
+    You can train your algorithms using the normal equation or gradient descent.
+    The library is implemented in pure ruby using Ruby's Matrix implementation.
+email: ronan.louarn.dev@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/lr_linear_regression.rb
+homepage: https://github.com/
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.7.6
+signing_key:
+specification_version: 4
+summary: Linear regression implemented in Ruby.
+test_files: []