principal-components-analysis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 30d9d35d743b426922569afb41295639cb782933
4
+ data.tar.gz: dc2eb5a6667402b76e5cc34152e75f73e147c3e2
5
+ SHA512:
6
+ metadata.gz: cc77abe7776c57c6fb4e610c03520ffd8410030c6614420761c896f27363a6a61e2bf0596f4f2993fb7ae94f549ae90c70481ba000c4f73b212ae6ee3961c7c3
7
+ data.tar.gz: a805c60a0611fb571c82674b19bc1fe3757c643ef472c47365df73c6e60295297af72435cf8c4abeee2fe4d4b8e83618d0a5f159f17dd54d033f542c01d31729
@@ -0,0 +1,96 @@
1
+
2
+ module Enumerable
3
+ def entropy
4
+ dataset = Hash.new(0)
5
+ self.each{|x| dataset[x] += 1 }
6
+
7
+ entropy = 0.0
8
+ dataset.each do |k,v|
9
+ p = v.to_f / self.size
10
+ entropy += (-p)*Math.log2(p)
11
+ end
12
+
13
+ return entropy
14
+ end
15
+
16
+ def concitional_entropy_with(label)
17
+ dataset = Hash.new{|h,k| h[k] = Array.new }
18
+ self.each_with_index{|v,i| dataset[v] << label[i] }
19
+
20
+ new_entropy = 0.0
21
+ dataset.each{|k,v| new_entropy += (v.size.to_f / self.size)*v.entropy }
22
+ return new_entropy
23
+ end
24
+
25
+ def sum
26
+ self.inject(0){|accum, i| accum + i }
27
+ end
28
+
29
+ def mean
30
+ self.sum / self.length.to_f
31
+ end
32
+
33
+ def geo_mean
34
+ geo_sum = self.inject(0){|accum, i| accum + i*i }
35
+ Math.sqrt(geo_sum)
36
+ end
37
+
38
+ def median
39
+ sorted = self.sort
40
+ m = sorted.length / 2
41
+ if sorted.length.odd?
42
+ sorted[m]
43
+ else
44
+ (sorted[m-1]+sorted[m])/2.0
45
+ end
46
+ end
47
+
48
+ def sum_and_mean
49
+ sum = self.sum
50
+ mean = sum/self.length.to_f
51
+ return sum,mean
52
+ end
53
+
54
+ def variance(ddof=1)
55
+ m = self.mean
56
+ sum = self.inject(0){|accum, i| accum +(i-m)**2 }
57
+ sum / (self.length - ddof).to_f
58
+ end
59
+
60
+ def stdev(ddof=1)
61
+ return Math.sqrt(self.variance(ddof))
62
+ end
63
+
64
+ def variance_and_stdev(ddof=1)
65
+ sv = self.variance(ddof)
66
+ stdev = Math.sqrt(sv)
67
+ return sv, stdev
68
+ end
69
+
70
+ def covariance(arr,ddof=1)
71
+ raise "array length error" if arr.length!=self.length
72
+ xbar = self.mean
73
+ ybar = arr.mean
74
+
75
+ accum = 0.0
76
+ arr.length.times do |i|
77
+ accum += (self[i]-xbar)*(arr[i]-ybar)
78
+ end
79
+ return accum / (self.length - ddof).to_f
80
+ end
81
+
82
+ def pearson(arr,ddof=1)
83
+ self.covariance(arr,ddof) / (self.stdev(ddof) * arr.stdev(ddof))
84
+ end
85
+
86
+ def l1_normalize
87
+ m = self.sum.to_f
88
+ self.map{|x| x / m }
89
+ end
90
+
91
+ def l2_normalize
92
+ m = self.geo_mean
93
+ self.map{|x| x / m }
94
+ end
95
+ end
96
+
@@ -0,0 +1,65 @@
1
+
2
+ class Matrix
3
+ def covariance_matrix
4
+ dim = self.column_size
5
+ buff = Array.new(dim){Array.new(dim,0)}
6
+ 0.upto(dim-1) do |i|
7
+ i.upto(dim-1) do |j|
8
+ if i==j
9
+ buff[i][j] = self.column(i).to_a.variance
10
+ else
11
+ conv = self.column(i).to_a.covariance(self.column(j).to_a)
12
+ buff[i][j] = conv
13
+ buff[j][i] = conv
14
+ end
15
+ end
16
+ end
17
+ Matrix[*buff]
18
+ end
19
+
20
+ def l1_normalize
21
+ buff = Array.new
22
+ self.row_size.times do |i|
23
+ buff << self.row(i).to_a.l1_normalize
24
+ end
25
+ Matrix[*buff]
26
+ end
27
+
28
+ def l2_normalize
29
+ buff = Array.new
30
+ self.row_size.times do |i|
31
+ buff << self.row(i).to_a.l2_normalize
32
+ end
33
+ Matrix[*buff]
34
+ end
35
+
36
+ def to_json(*param)
37
+ buff = []
38
+ self.row_size.times do |i|
39
+ buff << self.row(i).to_a
40
+ end
41
+ return buff.to_json(param)
42
+ end
43
+
44
+ def inspect
45
+ buff = ""
46
+ self.row_size.times do |i|
47
+ if i==0
48
+ buff += "Matrix["
49
+ else
50
+ buff += " "
51
+ end
52
+
53
+ buff += "[" + self.row(i).to_a.join(",\t") + "]"
54
+
55
+ if i==self.row_size-1
56
+ buff += "]"
57
+ else
58
+ buff += ",\n"
59
+ end
60
+ end
61
+ return buff
62
+ end
63
+
64
+ alias to_s inspect
65
+ end
@@ -0,0 +1,38 @@
1
+ require 'matrix'
2
+ require 'matrix_extension'
3
+ require 'enumerable_extension'
4
+
5
+ class PCA
6
+ def initialize(entries)
7
+ @dimension = entries[0].size
8
+ @entries = Matrix[*entries]
9
+ conv_m = @entries.covariance_matrix
10
+ # p conv_m
11
+ eigen_vectors, eigen_values, v_inv = conv_m.eigensystem
12
+
13
+ @eigen = []
14
+ @total_eigenvalue = 0.0
15
+ @dimension.times do |i|
16
+ @eigen << { value: eigen_values[i,i], vector: eigen_vectors.row(i).to_a }
17
+ @total_eigenvalue += eigen_values[i,i]
18
+ end
19
+ @eigen.sort_by!{|v| -v[:value]}
20
+ end
21
+
22
+ def eigen
23
+ @eigen
24
+ end
25
+
26
+ def reduce(reducing_dimension=1)
27
+ factor_array = []
28
+ sum_eigenvalue = 0.0
29
+ reducing_dimension.times.each do |i|
30
+ factor_array << @eigen[i][:vector]#.map{|v| v * @eigen[i][:value] }
31
+ sum_eigenvalue += @eigen[i][:value]
32
+ end
33
+ factor = Matrix[ *factor_array ]
34
+ reduced_matrix = @entries * factor.t
35
+ distortion_rate = 1 - (sum_eigenvalue/@total_eigenvalue)
36
+ return reduced_matrix,distortion_rate
37
+ end
38
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: principal-components-analysis
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - ireullin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-06-02 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A PCA algorithm for reducing dimension
14
+ email:
15
+ - ireullin@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/enumerable_extension.rb
21
+ - lib/matrix_extension.rb
22
+ - lib/principal-components-analysis.rb
23
+ homepage: https://github.com/ireullin/principal-components-analysis
24
+ licenses:
25
+ - MIT
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 2.2.2
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: A PCA algorithm for reducing dimension
47
+ test_files: []