lr-linear-regression 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/lr_linear_regression.rb +158 -0
  3. metadata +68 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e1724a5b3d466f518f717d0b090ec8e25a524f1d317e9f872340bea1bd273f52
4
+ data.tar.gz: 8e44722f9a20bc76917c2321bc0f0189a7a29b253dced311bb1056921625e2cf
5
+ SHA512:
6
+ metadata.gz: ee2dfd794d0de362e8987f6f2442876c0c1115b46a445e2317583fbc444ace1122b9fea409fbffaea6d9ed6e52c3567e5ffd8ff5e977636fcef22c3451d0d4f8
7
+ data.tar.gz: 28525aa869276260b76aa13a2959c69f84bb16097e6b13e7f396de61e9377cd849df566435083c09f6345e90aa820a16b3dd5a1ba78d1d13b5860204c96ec579
@@ -0,0 +1,158 @@
1
+ require 'matrix'
2
+
3
+ # RubyLinearRegression
4
+ class RubyLinearRegression
5
+
6
+ attr_reader :x,:y,:theta,:mu,:sigma, :lambda, :normalize
7
+
8
+ def initialize
9
+ @mu = 0
10
+ @sigma = 1
11
+ end
12
+
13
+ # Loads and normalizes the training data, must be called prior to training.
14
+ # Arguments:
15
+ # x_data: (Two dimensiolnal array with the independent variables of your training data)
16
+ # y_data: (Array with the dependent variables of your training data)
17
+ def load_training_data x_data, y_data, normalize = true
18
+
19
+ @normalize = normalize
20
+
21
+ # normalize the x_data
22
+ x_data = normalize_data( x_data ) if @normalize
23
+
24
+ # add 1 column to our data
25
+ x_data = x_data.map { |r| [1].concat(r) }
26
+
27
+ # build our x Matrix & y Vector
28
+ @x = Matrix.rows( x_data )
29
+ @y = Matrix.rows( y_data.collect { |e| [e] } )
30
+
31
+ @theta = Matrix.zero(@x.column_size, 1)
32
+ end
33
+
34
+ # Compute the mean squared cost / error function
35
+ def compute_cost test_x = nil, test_y = nil
36
+
37
+ if not test_x.nil?
38
+ test_x.each_index do |row|
39
+ test_x[row].each_index do |i|
40
+ test_x[row][i] = (test_x[row][i] - @mu[i]) / @sigma[i].to_f
41
+ end
42
+ end if @normalize
43
+ test_x = test_x.map { |r| [1].concat(r) }
44
+ end
45
+
46
+ # per default use training data to compute cost if no data is given
47
+ cost_x = test_x.nil? ? @x : Matrix.rows( test_x )
48
+ cost_y = test_y.nil? ? @y : Matrix.rows( test_y.collect { |e| [e] } )
49
+
50
+ # First use matrix multiplication and vector subtracton to find errors
51
+ errors = (cost_x * @theta) - cost_y
52
+
53
+ # Then square all errors
54
+ errors = errors.map { |e| (e.to_f**2) }
55
+
56
+ # Find the mean of the square errors
57
+ mean_square_error = 0.5 * (errors.inject{ |sum, e| sum + e }.to_f / errors.row_size)
58
+
59
+ return mean_square_error
60
+ end
61
+
62
+ # Calculate the optimal theta using the normal equation
63
+ def train_normal_equation l = 0
64
+
65
+ @lambda = l
66
+ lambda_matrix = Matrix.build(@theta.row_size,@theta.row_size) do |c,r|
67
+ (( c == 0 && r == 0) || c != r) ? 0 : 1;
68
+ end
69
+
70
+ # Calculate the optimal theta using the normal equation
71
+ # theta = ( X' * X )^1 * X' * y
72
+ @theta = (@x.transpose * @x + @lambda * lambda_matrix ).inverse * @x.transpose * @y
73
+
74
+ return @theta
75
+ end
76
+
77
+ # Calculate optimal theta using gradient descent
78
+ # Arguments:
79
+ # alpha: Learning rate
80
+ # iterations: Number of iterations to run gradient descent
81
+ # verbose: If true will output cost after each iteration, can be used to find optimal learning rate (alpha) and iteration
82
+ def train_gradient_descent( alpha = 0.01, iterations = 500, verbose = false )
83
+
84
+ 0.upto( iterations ) do |i|
85
+ @temp_theta = Array.new(@theta.row_size)
86
+ 0.upto(@theta.row_size-1) do |row|
87
+ @temp_theta[row] = @theta[row,0] - alpha * compute_gradient(row)
88
+ end
89
+
90
+ @theta = Matrix.columns([@temp_theta])
91
+
92
+ puts "Cost after #{i} iterations = #{compute_cost}" if verbose
93
+ end
94
+
95
+ end
96
+
97
+ # Makes a prediction based on your trained model.
98
+ # train_normal_equation must be called prior to making a prediction.
99
+ #
100
+ # Arguments:
101
+ # data: (Array of independent variables to base your prediction on)
102
+ def predict data
103
+
104
+ # normalize
105
+ data.each_index do |i|
106
+ data[i] = (data[i] - @mu[i]) / @sigma[i].to_f
107
+ end if @normalize
108
+
109
+ # add 1 column to prediction data
110
+ data = [1].concat( data )
111
+
112
+ # perform prediction
113
+ prediction = (Matrix[data] * @theta)[0,0].to_f
114
+
115
+ return prediction
116
+
117
+ end
118
+
119
+ private
120
+ def normalize_data(x_data, mu = nil, sigma = nil)
121
+
122
+ row_size = x_data.size
123
+ column_count = x_data[0].is_a?( Array) ? x_data[0].size : 1
124
+
125
+ x_norm = Array.new(row_size)
126
+ @mu = Array.new(column_count)
127
+ @sigma = Array.new(column_count)
128
+
129
+ 0.upto(column_count - 1) do |column|
130
+ column_data = x_data.map{ |e| e[column] }
131
+ @mu[column] = column_data.inject{ |sum, e| sum + e } / row_size
132
+ @sigma[column] = (column_data.max - column_data.min)
133
+ end
134
+
135
+ 0.upto(row_size-1) do |row|
136
+ row_data = x_data[row]
137
+ x_norm[row] = Array.new(column_count)
138
+ row_data.each_index do |i|
139
+ x_norm[row][i] = (row_data[i] - @mu[i]) / @sigma[i].to_f
140
+ end
141
+ end
142
+
143
+ return x_norm
144
+
145
+ end
146
+
147
+ # Compute the mean squared cost / error function
148
+ def compute_gradient( parameter )
149
+
150
+ # First use matrix multiplication and vector subtracton to find errors
151
+ gradients = ((@x * @theta) - @y).transpose * @x.column(parameter)
152
+
153
+ # Mean the grandient
154
+ mean = gradients.inject{ |sum, e| sum + e } / gradients.size
155
+
156
+ return mean
157
+ end
158
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lr-linear-regression
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - LOUARN Ronan
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-01-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.10'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 5.10.2
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '5.10'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 5.10.2
33
+ description: |-
34
+ An implementation of a linear regression machine learning algorithm implemented in Ruby.
35
+ The library supports simple problems with one independent variable used to predict a dependent variable as well as multivariate problems with multiple independent variables to predict a dependent variable.
36
+ You can train your algorithms using the normal equation or gradient descent.
37
+ The library is implemented in pure ruby using Ruby's Matrix implementation.
38
+ email: ronan.louarn.dev@gmail.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - lib/lr_linear_regression.rb
44
+ homepage: https://github.com/
45
+ licenses:
46
+ - MIT
47
+ metadata: {}
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubyforge_project:
64
+ rubygems_version: 2.7.6
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: Linear regression implemented in Ruby.
68
+ test_files: []