linear-regressions 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c64615109379ef7ead3598aa35d3428d7d4dd8b7
4
+ data.tar.gz: 63f04cf0baeb243c3f76661392c9a9b24a816da9
5
+ SHA512:
6
+ metadata.gz: 4274651628d3d38bf5607404f577496de0ecb64fff3c8c23bf082f384ee32dc8016459f23afbf97b73a3c571cee99f0666a3bb3bd22fff8301cb07a7bf41341c
7
+ data.tar.gz: b8d80ae9990539c66431a7932042d2c4f66fd2ac8b3499b1a1e5abd872d1a544cc18162147c668710a025fae2fc7eede16fe8f8da341e43427268200ed8b1940
@@ -0,0 +1,96 @@
1
+
2
+ module Enumerable
3
+ def entropy
4
+ dataset = Hash.new(0)
5
+ self.each{|x| dataset[x] += 1 }
6
+
7
+ entropy = 0.0
8
+ dataset.each do |k,v|
9
+ p = v.to_f / self.size
10
+ entropy += (-p)*Math.log2(p)
11
+ end
12
+
13
+ return entropy
14
+ end
15
+
16
+ def concitional_entropy_with(label)
17
+ dataset = Hash.new{|h,k| h[k] = Array.new }
18
+ self.each_with_index{|v,i| dataset[v] << label[i] }
19
+
20
+ new_entropy = 0.0
21
+ dataset.each{|k,v| new_entropy += (v.size.to_f / self.size)*v.entropy }
22
+ return new_entropy
23
+ end
24
+
25
+ def sum
26
+ self.inject(0){|accum, i| accum + i }
27
+ end
28
+
29
+ def mean
30
+ self.sum / self.length.to_f
31
+ end
32
+
33
+ def geo_mean
34
+ geo_sum = self.inject(0){|accum, i| accum + i*i }
35
+ Math.sqrt(geo_sum)
36
+ end
37
+
38
+ def median
39
+ sorted = self.sort
40
+ m = sorted.length / 2
41
+ if sorted.length.odd?
42
+ sorted[m]
43
+ else
44
+ (sorted[m-1]+sorted[m])/2.0
45
+ end
46
+ end
47
+
48
+ def sum_and_mean
49
+ sum = self.sum
50
+ mean = sum/self.length.to_f
51
+ return sum,mean
52
+ end
53
+
54
+ def variance(ddof=1)
55
+ m = self.mean
56
+ sum = self.inject(0){|accum, i| accum +(i-m)**2 }
57
+ sum / (self.length - ddof).to_f
58
+ end
59
+
60
+ def stdev(ddof=1)
61
+ return Math.sqrt(self.variance(ddof))
62
+ end
63
+
64
+ def variance_and_stdev(ddof=1)
65
+ sv = self.variance(ddof)
66
+ stdev = Math.sqrt(sv)
67
+ return sv, stdev
68
+ end
69
+
70
+ def covariance(arr,ddof=1)
71
+ raise "array length error" if arr.length!=self.length
72
+ xbar = self.mean
73
+ ybar = arr.mean
74
+
75
+ accum = 0.0
76
+ arr.length.times do |i|
77
+ accum += (self[i]-xbar)*(arr[i]-ybar)
78
+ end
79
+ return accum / (self.length - ddof).to_f
80
+ end
81
+
82
+ def pearson(arr,ddof=1)
83
+ self.covariance(arr,ddof) / (self.stdev(ddof) * arr.stdev(ddof))
84
+ end
85
+
86
+ def l1_normalize
87
+ m = self.sum.to_f
88
+ self.map{|x| x / m }
89
+ end
90
+
91
+ def l2_normalize
92
+ m = self.geo_mean
93
+ self.map{|x| x / m }
94
+ end
95
+ end
96
+
@@ -0,0 +1,87 @@
1
+ require 'json'
2
+ require 'matrix'
3
+ require './enumerable_extension'
4
+ require './matrix_extension'
5
+
6
+ module LinearRegression
7
+ class LinearRegressionBase
8
+ def train(entries)
9
+ raise "hasn't implemented"
10
+ end
11
+
12
+ def beta
13
+ @beta
14
+ end
15
+
16
+ def predict(vector)
17
+ x = Matrix[vector + [1]]
18
+ x_cross_beta = x * @beta
19
+ return x_cross_beta[0,0]
20
+ end
21
+
22
+ def r_squared_score(new_entries)
23
+ y = new_entries.map{|v|v[:label]}
24
+ y_bar = y.mean
25
+ ss_tot = 0.0
26
+ y.each do |v|
27
+ ss_tot += (v-y_bar)**2
28
+ end
29
+ # puts "ss_tot=#{ss_tot}"
30
+
31
+ ss_res = 0.0
32
+ new_entries.each do |e|
33
+ prediction = predict(e[:features])
34
+ ss_res += (e[:label]-prediction)**2
35
+ end
36
+ #puts "ss_res=#{ss_res}"
37
+ return 1-(ss_res/ss_tot)
38
+ end
39
+ end
40
+
41
+ class AlternatingLeastSquares < LinearRegressionBase
42
+ def train(entries)
43
+ label = entries.map{|e| e[:label] }
44
+ features = entries.map{|e| e[:features] }.map{|e| e+[1] }
45
+
46
+ @dimension = features[0].size
47
+
48
+ y = Matrix[label].t
49
+ x = Matrix[*features]
50
+
51
+ @beta = (x.t * x).inv * x.t * y
52
+ end
53
+ end
54
+
55
+ # alias
56
+ ALS = AlternatingLeastSquares
57
+
58
+ class GradientDescent < LinearRegressionBase
59
+ def initialize(num_iter: 100, alpha: 0.01)
60
+ @num_iter = num_iter
61
+ @alpha = alpha
62
+ end
63
+
64
+ def train(entries, &block)
65
+ label = entries.map{|e| e[:label] }
66
+ features = entries.map{|e| e[:features] }.map{|e| e+[1]}
67
+
68
+ @dimension = features[0].size
69
+
70
+ y = Matrix[label].t
71
+ x = Matrix[*features]
72
+
73
+ @beta = Matrix[@dimension.times.map{|x|1}].t
74
+
75
+ @num_iter.times do |i|
76
+ y_bar = x * @beta
77
+ loss = y_bar - y
78
+ gradient = (x.t * loss)/ entries.size
79
+ @beta = @beta - (@alpha * gradient)
80
+ yield i,@beta,loss if block!=nil
81
+ end
82
+ end
83
+ end
84
+
85
+ # alias
86
+ GD = GradientDescent
87
+ end
@@ -0,0 +1,65 @@
1
+
2
+ class Matrix
3
+ def covariance_matrix
4
+ dim = self.column_size
5
+ buff = Array.new(dim){Array.new(dim,0)}
6
+ 0.upto(dim-1) do |i|
7
+ i.upto(dim-1) do |j|
8
+ if i==j
9
+ buff[i][j] = self.column(i).to_a.variance
10
+ else
11
+ conv = self.column(i).to_a.covariance(self.column(j).to_a)
12
+ buff[i][j] = conv
13
+ buff[j][i] = conv
14
+ end
15
+ end
16
+ end
17
+ Matrix[*buff]
18
+ end
19
+
20
+ def l1_normalize
21
+ buff = Array.new
22
+ self.row_size.times do |i|
23
+ buff << self.row(i).to_a.l1_normalize
24
+ end
25
+ Matrix[*buff]
26
+ end
27
+
28
+ def l2_normalize
29
+ buff = Array.new
30
+ self.row_size.times do |i|
31
+ buff << self.row(i).to_a.l2_normalize
32
+ end
33
+ Matrix[*buff]
34
+ end
35
+
36
+ def to_json(*param)
37
+ buff = []
38
+ self.row_size.times do |i|
39
+ buff << self.row(i).to_a
40
+ end
41
+ return buff.to_json(param)
42
+ end
43
+
44
+ def inspect
45
+ buff = ""
46
+ self.row_size.times do |i|
47
+ if i==0
48
+ buff += "Matrix["
49
+ else
50
+ buff += " "
51
+ end
52
+
53
+ buff += "[" + self.row(i).to_a.join(",\t") + "]"
54
+
55
+ if i==self.row_size-1
56
+ buff += "]"
57
+ else
58
+ buff += ",\n"
59
+ end
60
+ end
61
+ return buff
62
+ end
63
+
64
+ alias to_s inspect
65
+ end
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: linear-regressions
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - ireullin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-06-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Linear regression algorithms which implemented Alternating Least Squares
14
+ & Gradient Descent
15
+ email:
16
+ - ireullin@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/enumerable_extension.rb
22
+ - lib/linear_regressions.rb
23
+ - lib/matrix_extension.rb
24
+ homepage: https://github.com/ireullin/linear-regressions
25
+ licenses:
26
+ - MIT
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.2.2
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: Linear regression algorithms which implemented Alternating Least Squares
48
+ & Gradient Descent
49
+ test_files: []