lr-linear-regression 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/lr_linear_regression.rb +158 -0
  3. metadata +68 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e1724a5b3d466f518f717d0b090ec8e25a524f1d317e9f872340bea1bd273f52
4
+ data.tar.gz: 8e44722f9a20bc76917c2321bc0f0189a7a29b253dced311bb1056921625e2cf
5
+ SHA512:
6
+ metadata.gz: ee2dfd794d0de362e8987f6f2442876c0c1115b46a445e2317583fbc444ace1122b9fea409fbffaea6d9ed6e52c3567e5ffd8ff5e977636fcef22c3451d0d4f8
7
+ data.tar.gz: 28525aa869276260b76aa13a2959c69f84bb16097e6b13e7f396de61e9377cd849df566435083c09f6345e90aa820a16b3dd5a1ba78d1d13b5860204c96ec579
@@ -0,0 +1,158 @@
1
+ require 'matrix'
2
+
3
+ # RubyLinearRegression
4
+ class RubyLinearRegression
5
+
6
+ attr_reader :x,:y,:theta,:mu,:sigma, :lambda, :normalize
7
+
8
+ def initialize
9
+ @mu = 0
10
+ @sigma = 1
11
+ end
12
+
13
+ # Loads and normalizes the training data, must be called prior to training.
14
+ # Arguments:
15
+ # x_data: (Two dimensiolnal array with the independent variables of your training data)
16
+ # y_data: (Array with the dependent variables of your training data)
17
+ def load_training_data x_data, y_data, normalize = true
18
+
19
+ @normalize = normalize
20
+
21
+ # normalize the x_data
22
+ x_data = normalize_data( x_data ) if @normalize
23
+
24
+ # add 1 column to our data
25
+ x_data = x_data.map { |r| [1].concat(r) }
26
+
27
+ # build our x Matrix & y Vector
28
+ @x = Matrix.rows( x_data )
29
+ @y = Matrix.rows( y_data.collect { |e| [e] } )
30
+
31
+ @theta = Matrix.zero(@x.column_size, 1)
32
+ end
33
+
34
+ # Compute the mean squared cost / error function
35
+ def compute_cost test_x = nil, test_y = nil
36
+
37
+ if not test_x.nil?
38
+ test_x.each_index do |row|
39
+ test_x[row].each_index do |i|
40
+ test_x[row][i] = (test_x[row][i] - @mu[i]) / @sigma[i].to_f
41
+ end
42
+ end if @normalize
43
+ test_x = test_x.map { |r| [1].concat(r) }
44
+ end
45
+
46
+ # per default use training data to compute cost if no data is given
47
+ cost_x = test_x.nil? ? @x : Matrix.rows( test_x )
48
+ cost_y = test_y.nil? ? @y : Matrix.rows( test_y.collect { |e| [e] } )
49
+
50
+ # First use matrix multiplication and vector subtracton to find errors
51
+ errors = (cost_x * @theta) - cost_y
52
+
53
+ # Then square all errors
54
+ errors = errors.map { |e| (e.to_f**2) }
55
+
56
+ # Find the mean of the square errors
57
+ mean_square_error = 0.5 * (errors.inject{ |sum, e| sum + e }.to_f / errors.row_size)
58
+
59
+ return mean_square_error
60
+ end
61
+
62
+ # Calculate the optimal theta using the normal equation
63
+ def train_normal_equation l = 0
64
+
65
+ @lambda = l
66
+ lambda_matrix = Matrix.build(@theta.row_size,@theta.row_size) do |c,r|
67
+ (( c == 0 && r == 0) || c != r) ? 0 : 1;
68
+ end
69
+
70
+ # Calculate the optimal theta using the normal equation
71
+ # theta = ( X' * X )^1 * X' * y
72
+ @theta = (@x.transpose * @x + @lambda * lambda_matrix ).inverse * @x.transpose * @y
73
+
74
+ return @theta
75
+ end
76
+
77
+ # Calculate optimal theta using gradient descent
78
+ # Arguments:
79
+ # alpha: Learning rate
80
+ # iterations: Number of iterations to run gradient descent
81
+ # verbose: If true will output cost after each iteration, can be used to find optimal learning rate (alpha) and iteration
82
+ def train_gradient_descent( alpha = 0.01, iterations = 500, verbose = false )
83
+
84
+ 0.upto( iterations ) do |i|
85
+ @temp_theta = Array.new(@theta.row_size)
86
+ 0.upto(@theta.row_size-1) do |row|
87
+ @temp_theta[row] = @theta[row,0] - alpha * compute_gradient(row)
88
+ end
89
+
90
+ @theta = Matrix.columns([@temp_theta])
91
+
92
+ puts "Cost after #{i} iterations = #{compute_cost}" if verbose
93
+ end
94
+
95
+ end
96
+
97
+ # Makes a prediction based on your trained model.
98
+ # train_normal_equation must be called prior to making a prediction.
99
+ #
100
+ # Arguments:
101
+ # data: (Array of independent variables to base your prediction on)
102
+ def predict data
103
+
104
+ # normalize
105
+ data.each_index do |i|
106
+ data[i] = (data[i] - @mu[i]) / @sigma[i].to_f
107
+ end if @normalize
108
+
109
+ # add 1 column to prediction data
110
+ data = [1].concat( data )
111
+
112
+ # perform prediction
113
+ prediction = (Matrix[data] * @theta)[0,0].to_f
114
+
115
+ return prediction
116
+
117
+ end
118
+
119
+ private
120
+ def normalize_data(x_data, mu = nil, sigma = nil)
121
+
122
+ row_size = x_data.size
123
+ column_count = x_data[0].is_a?( Array) ? x_data[0].size : 1
124
+
125
+ x_norm = Array.new(row_size)
126
+ @mu = Array.new(column_count)
127
+ @sigma = Array.new(column_count)
128
+
129
+ 0.upto(column_count - 1) do |column|
130
+ column_data = x_data.map{ |e| e[column] }
131
+ @mu[column] = column_data.inject{ |sum, e| sum + e } / row_size
132
+ @sigma[column] = (column_data.max - column_data.min)
133
+ end
134
+
135
+ 0.upto(row_size-1) do |row|
136
+ row_data = x_data[row]
137
+ x_norm[row] = Array.new(column_count)
138
+ row_data.each_index do |i|
139
+ x_norm[row][i] = (row_data[i] - @mu[i]) / @sigma[i].to_f
140
+ end
141
+ end
142
+
143
+ return x_norm
144
+
145
+ end
146
+
147
+ # Compute the mean squared cost / error function
148
+ def compute_gradient( parameter )
149
+
150
+ # First use matrix multiplication and vector subtracton to find errors
151
+ gradients = ((@x * @theta) - @y).transpose * @x.column(parameter)
152
+
153
+ # Mean the grandient
154
+ mean = gradients.inject{ |sum, e| sum + e } / gradients.size
155
+
156
+ return mean
157
+ end
158
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lr-linear-regression
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - LOUARN Ronan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-01-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.10'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 5.10.2
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '5.10'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 5.10.2
33
+ description: |-
34
+ An implementation of a linear regression machine learning algorithm implemented in Ruby.
35
+ The library supports simple problems with one independent variable used to predict a dependent variable as well as multivariate problems with multiple independent variables to predict a dependent variable.
36
+ You can train your algorithms using the normal equation or gradient descent.
37
+ The library is implemented in pure ruby using Ruby's Matrix implementation.
38
+ email: ronan.louarn.dev@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - lib/lr_linear_regression.rb
44
+ homepage: https://github.com/
45
+ licenses:
46
+ - MIT
47
+ metadata: {}
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubyforge_project:
64
+ rubygems_version: 2.7.6
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: Linear regression implemented in Ruby.
68
+ test_files: []