lr-linear-regression 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/lr_linear_regression.rb +158 -0
- metadata +68 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e1724a5b3d466f518f717d0b090ec8e25a524f1d317e9f872340bea1bd273f52
|
4
|
+
data.tar.gz: 8e44722f9a20bc76917c2321bc0f0189a7a29b253dced311bb1056921625e2cf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ee2dfd794d0de362e8987f6f2442876c0c1115b46a445e2317583fbc444ace1122b9fea409fbffaea6d9ed6e52c3567e5ffd8ff5e977636fcef22c3451d0d4f8
|
7
|
+
data.tar.gz: 28525aa869276260b76aa13a2959c69f84bb16097e6b13e7f396de61e9377cd849df566435083c09f6345e90aa820a16b3dd5a1ba78d1d13b5860204c96ec579
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
|
3
|
+
# RubyLinearRegression
|
4
|
+
class RubyLinearRegression
|
5
|
+
|
6
|
+
attr_reader :x,:y,:theta,:mu,:sigma, :lambda, :normalize
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@mu = 0
|
10
|
+
@sigma = 1
|
11
|
+
end
|
12
|
+
|
13
|
+
# Loads and normalizes the training data, must be called prior to training.
|
14
|
+
# Arguments:
|
15
|
+
# x_data: (Two dimensiolnal array with the independent variables of your training data)
|
16
|
+
# y_data: (Array with the dependent variables of your training data)
|
17
|
+
def load_training_data x_data, y_data, normalize = true
|
18
|
+
|
19
|
+
@normalize = normalize
|
20
|
+
|
21
|
+
# normalize the x_data
|
22
|
+
x_data = normalize_data( x_data ) if @normalize
|
23
|
+
|
24
|
+
# add 1 column to our data
|
25
|
+
x_data = x_data.map { |r| [1].concat(r) }
|
26
|
+
|
27
|
+
# build our x Matrix & y Vector
|
28
|
+
@x = Matrix.rows( x_data )
|
29
|
+
@y = Matrix.rows( y_data.collect { |e| [e] } )
|
30
|
+
|
31
|
+
@theta = Matrix.zero(@x.column_size, 1)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Compute the mean squared cost / error function
|
35
|
+
def compute_cost test_x = nil, test_y = nil
|
36
|
+
|
37
|
+
if not test_x.nil?
|
38
|
+
test_x.each_index do |row|
|
39
|
+
test_x[row].each_index do |i|
|
40
|
+
test_x[row][i] = (test_x[row][i] - @mu[i]) / @sigma[i].to_f
|
41
|
+
end
|
42
|
+
end if @normalize
|
43
|
+
test_x = test_x.map { |r| [1].concat(r) }
|
44
|
+
end
|
45
|
+
|
46
|
+
# per default use training data to compute cost if no data is given
|
47
|
+
cost_x = test_x.nil? ? @x : Matrix.rows( test_x )
|
48
|
+
cost_y = test_y.nil? ? @y : Matrix.rows( test_y.collect { |e| [e] } )
|
49
|
+
|
50
|
+
# First use matrix multiplication and vector subtracton to find errors
|
51
|
+
errors = (cost_x * @theta) - cost_y
|
52
|
+
|
53
|
+
# Then square all errors
|
54
|
+
errors = errors.map { |e| (e.to_f**2) }
|
55
|
+
|
56
|
+
# Find the mean of the square errors
|
57
|
+
mean_square_error = 0.5 * (errors.inject{ |sum, e| sum + e }.to_f / errors.row_size)
|
58
|
+
|
59
|
+
return mean_square_error
|
60
|
+
end
|
61
|
+
|
62
|
+
# Calculate the optimal theta using the normal equation
|
63
|
+
def train_normal_equation l = 0
|
64
|
+
|
65
|
+
@lambda = l
|
66
|
+
lambda_matrix = Matrix.build(@theta.row_size,@theta.row_size) do |c,r|
|
67
|
+
(( c == 0 && r == 0) || c != r) ? 0 : 1;
|
68
|
+
end
|
69
|
+
|
70
|
+
# Calculate the optimal theta using the normal equation
|
71
|
+
# theta = ( X' * X )^1 * X' * y
|
72
|
+
@theta = (@x.transpose * @x + @lambda * lambda_matrix ).inverse * @x.transpose * @y
|
73
|
+
|
74
|
+
return @theta
|
75
|
+
end
|
76
|
+
|
77
|
+
# Calculate optimal theta using gradient descent
|
78
|
+
# Arguments:
|
79
|
+
# alpha: Learning rate
|
80
|
+
# iterations: Number of iterations to run gradient descent
|
81
|
+
# verbose: If true will output cost after each iteration, can be used to find optimal learning rate (alpha) and iteration
|
82
|
+
def train_gradient_descent( alpha = 0.01, iterations = 500, verbose = false )
|
83
|
+
|
84
|
+
0.upto( iterations ) do |i|
|
85
|
+
@temp_theta = Array.new(@theta.row_size)
|
86
|
+
0.upto(@theta.row_size-1) do |row|
|
87
|
+
@temp_theta[row] = @theta[row,0] - alpha * compute_gradient(row)
|
88
|
+
end
|
89
|
+
|
90
|
+
@theta = Matrix.columns([@temp_theta])
|
91
|
+
|
92
|
+
puts "Cost after #{i} iterations = #{compute_cost}" if verbose
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
# Makes a prediction based on your trained model.
|
98
|
+
# train_normal_equation must be called prior to making a prediction.
|
99
|
+
#
|
100
|
+
# Arguments:
|
101
|
+
# data: (Array of independent variables to base your prediction on)
|
102
|
+
def predict data
|
103
|
+
|
104
|
+
# normalize
|
105
|
+
data.each_index do |i|
|
106
|
+
data[i] = (data[i] - @mu[i]) / @sigma[i].to_f
|
107
|
+
end if @normalize
|
108
|
+
|
109
|
+
# add 1 column to prediction data
|
110
|
+
data = [1].concat( data )
|
111
|
+
|
112
|
+
# perform prediction
|
113
|
+
prediction = (Matrix[data] * @theta)[0,0].to_f
|
114
|
+
|
115
|
+
return prediction
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
def normalize_data(x_data, mu = nil, sigma = nil)
|
121
|
+
|
122
|
+
row_size = x_data.size
|
123
|
+
column_count = x_data[0].is_a?( Array) ? x_data[0].size : 1
|
124
|
+
|
125
|
+
x_norm = Array.new(row_size)
|
126
|
+
@mu = Array.new(column_count)
|
127
|
+
@sigma = Array.new(column_count)
|
128
|
+
|
129
|
+
0.upto(column_count - 1) do |column|
|
130
|
+
column_data = x_data.map{ |e| e[column] }
|
131
|
+
@mu[column] = column_data.inject{ |sum, e| sum + e } / row_size
|
132
|
+
@sigma[column] = (column_data.max - column_data.min)
|
133
|
+
end
|
134
|
+
|
135
|
+
0.upto(row_size-1) do |row|
|
136
|
+
row_data = x_data[row]
|
137
|
+
x_norm[row] = Array.new(column_count)
|
138
|
+
row_data.each_index do |i|
|
139
|
+
x_norm[row][i] = (row_data[i] - @mu[i]) / @sigma[i].to_f
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
return x_norm
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
# Compute the mean squared cost / error function
|
148
|
+
def compute_gradient( parameter )
|
149
|
+
|
150
|
+
# First use matrix multiplication and vector subtracton to find errors
|
151
|
+
gradients = ((@x * @theta) - @y).transpose * @x.column(parameter)
|
152
|
+
|
153
|
+
# Mean the grandient
|
154
|
+
mean = gradients.inject{ |sum, e| sum + e } / gradients.size
|
155
|
+
|
156
|
+
return mean
|
157
|
+
end
|
158
|
+
end
|
metadata
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: lr-linear-regression
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- LOUARN Ronan
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-01-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: minitest
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '5.10'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 5.10.2
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '5.10'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 5.10.2
|
33
|
+
description: |-
|
34
|
+
An implementation of a linear regression machine learning algorithm implemented in Ruby.
|
35
|
+
The library supports simple problems with one independent variable used to predict a dependent variable as well as multivariate problems with multiple independent variables to predict a dependent variable.
|
36
|
+
You can train your algorithms using the normal equation or gradient descent.
|
37
|
+
The library is implemented in pure ruby using Ruby's Matrix implementation.
|
38
|
+
email: ronan.louarn.dev@gmail.com
|
39
|
+
executables: []
|
40
|
+
extensions: []
|
41
|
+
extra_rdoc_files: []
|
42
|
+
files:
|
43
|
+
- lib/lr_linear_regression.rb
|
44
|
+
homepage: https://github.com/
|
45
|
+
licenses:
|
46
|
+
- MIT
|
47
|
+
metadata: {}
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options: []
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
requirements: []
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 2.7.6
|
65
|
+
signing_key:
|
66
|
+
specification_version: 4
|
67
|
+
summary: Linear regression implemented in Ruby.
|
68
|
+
test_files: []
|