principal-components-analysis 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/enumerable_extension.rb +96 -0
- data/lib/matrix_extension.rb +65 -0
- data/lib/principal-components-analysis.rb +38 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 30d9d35d743b426922569afb41295639cb782933
|
4
|
+
data.tar.gz: dc2eb5a6667402b76e5cc34152e75f73e147c3e2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cc77abe7776c57c6fb4e610c03520ffd8410030c6614420761c896f27363a6a61e2bf0596f4f2993fb7ae94f549ae90c70481ba000c4f73b212ae6ee3961c7c3
|
7
|
+
data.tar.gz: a805c60a0611fb571c82674b19bc1fe3757c643ef472c47365df73c6e60295297af72435cf8c4abeee2fe4d4b8e83618d0a5f159f17dd54d033f542c01d31729
|
@@ -0,0 +1,96 @@
|
|
1
|
+
|
2
|
+
module Enumerable
|
3
|
+
def entropy
|
4
|
+
dataset = Hash.new(0)
|
5
|
+
self.each{|x| dataset[x] += 1 }
|
6
|
+
|
7
|
+
entropy = 0.0
|
8
|
+
dataset.each do |k,v|
|
9
|
+
p = v.to_f / self.size
|
10
|
+
entropy += (-p)*Math.log2(p)
|
11
|
+
end
|
12
|
+
|
13
|
+
return entropy
|
14
|
+
end
|
15
|
+
|
16
|
+
def concitional_entropy_with(label)
|
17
|
+
dataset = Hash.new{|h,k| h[k] = Array.new }
|
18
|
+
self.each_with_index{|v,i| dataset[v] << label[i] }
|
19
|
+
|
20
|
+
new_entropy = 0.0
|
21
|
+
dataset.each{|k,v| new_entropy += (v.size.to_f / self.size)*v.entropy }
|
22
|
+
return new_entropy
|
23
|
+
end
|
24
|
+
|
25
|
+
def sum
|
26
|
+
self.inject(0){|accum, i| accum + i }
|
27
|
+
end
|
28
|
+
|
29
|
+
def mean
|
30
|
+
self.sum / self.length.to_f
|
31
|
+
end
|
32
|
+
|
33
|
+
def geo_mean
|
34
|
+
geo_sum = self.inject(0){|accum, i| accum + i*i }
|
35
|
+
Math.sqrt(geo_sum)
|
36
|
+
end
|
37
|
+
|
38
|
+
def median
|
39
|
+
sorted = self.sort
|
40
|
+
m = sorted.length / 2
|
41
|
+
if sorted.length.odd?
|
42
|
+
sorted[m]
|
43
|
+
else
|
44
|
+
(sorted[m-1]+sorted[m])/2.0
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def sum_and_mean
|
49
|
+
sum = self.sum
|
50
|
+
mean = sum/self.length.to_f
|
51
|
+
return sum,mean
|
52
|
+
end
|
53
|
+
|
54
|
+
def variance(ddof=1)
|
55
|
+
m = self.mean
|
56
|
+
sum = self.inject(0){|accum, i| accum +(i-m)**2 }
|
57
|
+
sum / (self.length - ddof).to_f
|
58
|
+
end
|
59
|
+
|
60
|
+
def stdev(ddof=1)
|
61
|
+
return Math.sqrt(self.variance(ddof))
|
62
|
+
end
|
63
|
+
|
64
|
+
def variance_and_stdev(ddof=1)
|
65
|
+
sv = self.variance(ddof)
|
66
|
+
stdev = Math.sqrt(sv)
|
67
|
+
return sv, stdev
|
68
|
+
end
|
69
|
+
|
70
|
+
def covariance(arr,ddof=1)
|
71
|
+
raise "array length error" if arr.length!=self.length
|
72
|
+
xbar = self.mean
|
73
|
+
ybar = arr.mean
|
74
|
+
|
75
|
+
accum = 0.0
|
76
|
+
arr.length.times do |i|
|
77
|
+
accum += (self[i]-xbar)*(arr[i]-ybar)
|
78
|
+
end
|
79
|
+
return accum / (self.length - ddof).to_f
|
80
|
+
end
|
81
|
+
|
82
|
+
def pearson(arr,ddof=1)
|
83
|
+
self.covariance(arr,ddof) / (self.stdev(ddof) * arr.stdev(ddof))
|
84
|
+
end
|
85
|
+
|
86
|
+
def l1_normalize
|
87
|
+
m = self.sum.to_f
|
88
|
+
self.map{|x| x / m }
|
89
|
+
end
|
90
|
+
|
91
|
+
def l2_normalize
|
92
|
+
m = self.geo_mean
|
93
|
+
self.map{|x| x / m }
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
|
2
|
+
class Matrix
|
3
|
+
def covariance_matrix
|
4
|
+
dim = self.column_size
|
5
|
+
buff = Array.new(dim){Array.new(dim,0)}
|
6
|
+
0.upto(dim-1) do |i|
|
7
|
+
i.upto(dim-1) do |j|
|
8
|
+
if i==j
|
9
|
+
buff[i][j] = self.column(i).to_a.variance
|
10
|
+
else
|
11
|
+
conv = self.column(i).to_a.covariance(self.column(j).to_a)
|
12
|
+
buff[i][j] = conv
|
13
|
+
buff[j][i] = conv
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
Matrix[*buff]
|
18
|
+
end
|
19
|
+
|
20
|
+
def l1_normalize
|
21
|
+
buff = Array.new
|
22
|
+
self.row_size.times do |i|
|
23
|
+
buff << self.row(i).to_a.l1_normalize
|
24
|
+
end
|
25
|
+
Matrix[*buff]
|
26
|
+
end
|
27
|
+
|
28
|
+
def l2_normalize
|
29
|
+
buff = Array.new
|
30
|
+
self.row_size.times do |i|
|
31
|
+
buff << self.row(i).to_a.l2_normalize
|
32
|
+
end
|
33
|
+
Matrix[*buff]
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_json(*param)
|
37
|
+
buff = []
|
38
|
+
self.row_size.times do |i|
|
39
|
+
buff << self.row(i).to_a
|
40
|
+
end
|
41
|
+
return buff.to_json(param)
|
42
|
+
end
|
43
|
+
|
44
|
+
def inspect
|
45
|
+
buff = ""
|
46
|
+
self.row_size.times do |i|
|
47
|
+
if i==0
|
48
|
+
buff += "Matrix["
|
49
|
+
else
|
50
|
+
buff += " "
|
51
|
+
end
|
52
|
+
|
53
|
+
buff += "[" + self.row(i).to_a.join(",\t") + "]"
|
54
|
+
|
55
|
+
if i==self.row_size-1
|
56
|
+
buff += "]"
|
57
|
+
else
|
58
|
+
buff += ",\n"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
return buff
|
62
|
+
end
|
63
|
+
|
64
|
+
alias to_s inspect
|
65
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'matrix'
|
2
|
+
require 'matrix_extension'
|
3
|
+
require 'enumerable_extension'
|
4
|
+
|
5
|
+
class PCA
|
6
|
+
def initialize(entries)
|
7
|
+
@dimension = entries[0].size
|
8
|
+
@entries = Matrix[*entries]
|
9
|
+
conv_m = @entries.covariance_matrix
|
10
|
+
# p conv_m
|
11
|
+
eigen_vectors, eigen_values, v_inv = conv_m.eigensystem
|
12
|
+
|
13
|
+
@eigen = []
|
14
|
+
@total_eigenvalue = 0.0
|
15
|
+
@dimension.times do |i|
|
16
|
+
@eigen << { value: eigen_values[i,i], vector: eigen_vectors.row(i).to_a }
|
17
|
+
@total_eigenvalue += eigen_values[i,i]
|
18
|
+
end
|
19
|
+
@eigen.sort_by!{|v| -v[:value]}
|
20
|
+
end
|
21
|
+
|
22
|
+
def eigen
|
23
|
+
@eigen
|
24
|
+
end
|
25
|
+
|
26
|
+
def reduce(reducing_dimension=1)
|
27
|
+
factor_array = []
|
28
|
+
sum_eigenvalue = 0.0
|
29
|
+
reducing_dimension.times.each do |i|
|
30
|
+
factor_array << @eigen[i][:vector]#.map{|v| v * @eigen[i][:value] }
|
31
|
+
sum_eigenvalue += @eigen[i][:value]
|
32
|
+
end
|
33
|
+
factor = Matrix[ *factor_array ]
|
34
|
+
reduced_matrix = @entries * factor.t
|
35
|
+
distortion_rate = 1 - (sum_eigenvalue/@total_eigenvalue)
|
36
|
+
return reduced_matrix,distortion_rate
|
37
|
+
end
|
38
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: principal-components-analysis
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ireullin
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-06-02 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A PCA algorithm for reducing dimension
|
14
|
+
email:
|
15
|
+
- ireullin@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/enumerable_extension.rb
|
21
|
+
- lib/matrix_extension.rb
|
22
|
+
- lib/principal-components-analysis.rb
|
23
|
+
homepage: https://github.com/ireullin/principal-components-analysis
|
24
|
+
licenses:
|
25
|
+
- MIT
|
26
|
+
metadata: {}
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
requirements: []
|
42
|
+
rubyforge_project:
|
43
|
+
rubygems_version: 2.2.2
|
44
|
+
signing_key:
|
45
|
+
specification_version: 4
|
46
|
+
summary: A PCA algorithm for reducing dimension
|
47
|
+
test_files: []
|