pca 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/pca.rb +83 -0
  3. metadata +59 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8dc374097222849bba01f272a8df8d019bdefad9
4
+ data.tar.gz: 70af4ff18c1cb5796b54308ea43f1b202fcca8ed
5
+ SHA512:
6
+ metadata.gz: 6c2508f2c7a86e677330736ea733e8fedb525301ae3d1c1e6ad00e95dc7be07ba50aa4a4ef5e27cde958bb0091fd0310e8d8ae44ec2cc0987928c26da7c0bd3d
7
+ data.tar.gz: f708f797cdb4811680ed4a4390db9b918555fec1c8033c44356ebe64950697b6d09855cb93fec8fcb56dab3930e66a17171fcf8e0953f7518b5259f9a87bcbc2
@@ -0,0 +1,83 @@
1
+ require 'gsl'
2
+
3
+ class PCA
4
+ attr_reader :components, :singular_values, :mean, :explained_variance, :explained_variance_ratio
5
+
6
+ def initialize opts = {}
7
+ @n_components = opts[:components]
8
+ end
9
+
10
+ def fit x
11
+ x = prepare_data x
12
+ _fit x
13
+ self
14
+ end
15
+
16
+ def transform x
17
+ x = prepare_data x, use_saved_mean: true
18
+ _transform x
19
+ end
20
+
21
+ def fit_transform x
22
+ x = prepare_data x
23
+ _fit x
24
+ _transform x
25
+ end
26
+
27
+ def inverse_transform x
28
+ x = ensure_matrix x
29
+ out = x * @components.transpose
30
+ out.size2.times {|col| out.col(col).add! @mean[col] }
31
+ out
32
+ end
33
+
34
+ private
35
+ def prepare_data x, opts = {}
36
+ x = ensure_matrix x
37
+ @mean = calculate_mean(x) unless opts[:use_saved_mean]
38
+ mean_normalize x
39
+ x
40
+ end
41
+
42
+ def _fit x
43
+ covariance_matrix = (x.transpose * x) / x.size1
44
+ u, v, s = covariance_matrix.SV_decomp
45
+ @components = slice_n u
46
+ @singular_values = slice_n s
47
+ @explained_variance = @singular_values**2 / x.size1
48
+ @explained_variance_ratio = @explained_variance / @explained_variance.sum
49
+ end
50
+
51
+ def _transform x
52
+ x * @components
53
+ end
54
+
55
+ def ensure_matrix x
56
+ case x
57
+ when GSL::Matrix
58
+ x
59
+ when Array
60
+ GSL::Matrix[*x]
61
+ else
62
+ x.to_gm
63
+ end
64
+ end
65
+
66
+ def calculate_mean x
67
+ x.size2.times.map {|col| x.col(col).mean }
68
+ end
69
+
70
+ def mean_normalize x
71
+ x.size2.times {|col| x.col(col).sub! @mean[col] }
72
+ end
73
+
74
+ def slice_n x
75
+ return x unless @n_components
76
+ case x
77
+ when GSL::Matrix
78
+ x.submatrix(nil, 0, @n_components)
79
+ when GSL::Vector
80
+ x[0, @n_components]
81
+ end
82
+ end
83
+ end
metadata ADDED
@@ -0,0 +1,59 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pca
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Geoff Buesing
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rb-gsl
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: Principal Component Analysis
28
+ email: gbuesing@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/pca.rb
34
+ homepage: https://github.com/gbuesing/pca
35
+ licenses:
36
+ - MIT
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.4.5
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Principal Component Analysis
58
+ test_files: []
59
+ has_rdoc: