rbcluster 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +9 -0
- data/.travis.yml +6 -0
- data/Gemfile +4 -0
- data/LICENSE +29 -0
- data/README.md +54 -0
- data/Rakefile +17 -0
- data/examples/simple_kcluster.rb +10 -0
- data/ext/rbcluster/cluster.c +4598 -0
- data/ext/rbcluster/cluster.h +93 -0
- data/ext/rbcluster/extconf.rb +6 -0
- data/ext/rbcluster/rbcluster.c +775 -0
- data/lib/rbcluster.rb +5 -0
- data/lib/rbcluster/tree.rb +20 -0
- data/lib/rbcluster/version.rb +3 -0
- data/rbcluster.gemspec +24 -0
- data/spec/clustercentroids_spec.rb +6 -0
- data/spec/clusterdistance_spec.rb +106 -0
- data/spec/clustermedoids_spec.rb +6 -0
- data/spec/cuttree_spec.rb +6 -0
- data/spec/kcluster_spec.rb +95 -0
- data/spec/kmedoids_spec.rb +86 -0
- data/spec/median_mean_spec.rb +26 -0
- data/spec/node_spec.rb +27 -0
- data/spec/pca_spec.rb +113 -0
- data/spec/somcluster_spec.rb +81 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/treecluster_spec.rb +412 -0
- metadata +110 -0
data/lib/rbcluster.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Cluster
|
2
|
+
class Tree
|
3
|
+
def initialize(nodes)
|
4
|
+
raise NotImplementedError, "patches welcome :)"
|
5
|
+
|
6
|
+
nodes.each_with_index do |node, idx|
|
7
|
+
unless node.kind_of?(Node)
|
8
|
+
raise ArgumentError, "expected #{Node.class}, got #{node.class} at index #{idx}"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
@nodes = nodes
|
13
|
+
end
|
14
|
+
|
15
|
+
def size
|
16
|
+
@nodes.size
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
data/rbcluster.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "rbcluster/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "rbcluster"
|
7
|
+
s.version = Cluster::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Jari Bakken", "Michiel Jan Laurens de Hoon"]
|
10
|
+
s.email = ["jari.bakken@gmail.com"]
|
11
|
+
s.homepage = "http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm"
|
12
|
+
s.summary = %q{Ruby bindings for the Cluster C library}
|
13
|
+
s.description = %q{This gem provides a Ruby extension to the clustering routines in the C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).}
|
14
|
+
|
15
|
+
s.rubyforge_project = "rbcluster"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- spec/*`.split("\n")
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.extensions = `git ls-files -- ext/**/extconf.rb`.split("\n")
|
21
|
+
|
22
|
+
s.add_development_dependency "rake-compiler"
|
23
|
+
s.add_development_dependency "rspec", "~> 2.6.0"
|
24
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.clusterdistance" do
|
4
|
+
it "calculates distances for data set 1" do
|
5
|
+
weight = [ 1,1,1,1,1 ]
|
6
|
+
data = [[ 1.1, 2.2, 3.3, 4.4, 5.5, ],
|
7
|
+
[ 3.1, 3.2, 1.3, 2.4, 1.5, ],
|
8
|
+
[ 4.1, 2.2, 0.3, 5.4, 0.5, ],
|
9
|
+
[ 12.1, 2.0, 0.0, 5.0, 0.0, ]]
|
10
|
+
|
11
|
+
mask = [[ 1, 1, 1, 1, 1],
|
12
|
+
[ 1, 1, 1, 1, 1],
|
13
|
+
[ 1, 1, 1, 1, 1],
|
14
|
+
[ 1, 1, 1, 1, 1]]
|
15
|
+
|
16
|
+
# Cluster assignments
|
17
|
+
c1 = [0]
|
18
|
+
c2 = [1,2]
|
19
|
+
c3 = [3]
|
20
|
+
|
21
|
+
distance = Cluster.clusterdistance data, c1, c2, :mask => mask,
|
22
|
+
:weight => weight,
|
23
|
+
:dist => 'e',
|
24
|
+
:method => 'a',
|
25
|
+
:transpose => false
|
26
|
+
|
27
|
+
distance.should be_within(0.001).of(6.650)
|
28
|
+
|
29
|
+
distance = Cluster.clusterdistance data, c1, c3, :mask => mask,
|
30
|
+
:weight => weight,
|
31
|
+
:dist => 'e',
|
32
|
+
:method => 'a',
|
33
|
+
:transpose => false
|
34
|
+
|
35
|
+
distance.should be_within(0.001).of(32.508)
|
36
|
+
|
37
|
+
distance = Cluster.clusterdistance data, c2, c3, :mask => mask,
|
38
|
+
:weight => weight,
|
39
|
+
:dist => 'e',
|
40
|
+
:method => 'a',
|
41
|
+
:transpose => false
|
42
|
+
|
43
|
+
distance.should be_within(0.001).of(15.118)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "calculates distances for data set 2" do
|
47
|
+
weight = [ 1,1 ]
|
48
|
+
data = [[ 1.1, 1.2 ],
|
49
|
+
[ 1.4, 1.3 ],
|
50
|
+
[ 1.1, 1.5 ],
|
51
|
+
[ 2.0, 1.5 ],
|
52
|
+
[ 1.7, 1.9 ],
|
53
|
+
[ 1.7, 1.9 ],
|
54
|
+
[ 5.7, 5.9 ],
|
55
|
+
[ 5.7, 5.9 ],
|
56
|
+
[ 3.1, 3.3 ],
|
57
|
+
[ 5.4, 5.3 ],
|
58
|
+
[ 5.1, 5.5 ],
|
59
|
+
[ 5.0, 5.5 ],
|
60
|
+
[ 5.1, 5.2 ]]
|
61
|
+
mask = [[ 1, 1 ],
|
62
|
+
[ 1, 1 ],
|
63
|
+
[ 1, 1 ],
|
64
|
+
[ 1, 1 ],
|
65
|
+
[ 1, 1 ],
|
66
|
+
[ 1, 1 ],
|
67
|
+
[ 1, 1 ],
|
68
|
+
[ 1, 1 ],
|
69
|
+
[ 1, 1 ],
|
70
|
+
[ 1, 1 ],
|
71
|
+
[ 1, 1 ],
|
72
|
+
[ 1, 1 ],
|
73
|
+
[ 1, 1 ]]
|
74
|
+
|
75
|
+
# Cluster assignments
|
76
|
+
c1 = [ 0, 1, 2, 3 ]
|
77
|
+
c2 = [ 4, 5, 6, 7 ]
|
78
|
+
c3 = [ 8 ]
|
79
|
+
|
80
|
+
distance = Cluster.clusterdistance data, c1, c2, :mask => mask,
|
81
|
+
:weight => weight,
|
82
|
+
:dist => 'e',
|
83
|
+
:method => 'a',
|
84
|
+
:transpose => false
|
85
|
+
|
86
|
+
distance.should be_within(0.001).of(5.833)
|
87
|
+
|
88
|
+
distance = Cluster.clusterdistance data, c1, c3, :mask => mask,
|
89
|
+
:weight => weight,
|
90
|
+
:dist => 'e',
|
91
|
+
:method => 'a',
|
92
|
+
:transpose => false
|
93
|
+
|
94
|
+
distance.should be_within(0.001).of(3.298)
|
95
|
+
|
96
|
+
|
97
|
+
distance = Cluster.clusterdistance data, c2, c3, :mask => mask,
|
98
|
+
:weight => weight,
|
99
|
+
:dist => 'e',
|
100
|
+
:method => 'a',
|
101
|
+
:transpose => false
|
102
|
+
|
103
|
+
distance.should be_within(0.001).of(0.360)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.kcluster" do
|
4
|
+
it "should run kcluster for the given data" do
|
5
|
+
nclusters = 3
|
6
|
+
# First data set
|
7
|
+
weight = [1,1,1,1,1]
|
8
|
+
data = [[ 1.1, 2.2, 3.3, 4.4, 5.5],
|
9
|
+
[ 3.1, 3.2, 1.3, 2.4, 1.5],
|
10
|
+
[ 4.1, 2.2, 0.3, 5.4, 0.5],
|
11
|
+
[12.1, 2.0, 0.0, 5.0, 0.0]]
|
12
|
+
mask = [[ 1, 1, 1, 1, 1],
|
13
|
+
[ 1, 1, 1, 1, 1],
|
14
|
+
[ 1, 1, 1, 1, 1],
|
15
|
+
[ 1, 1, 1, 1, 1]]
|
16
|
+
|
17
|
+
|
18
|
+
clusterids, error, nfound = Cluster.kcluster data, :clusters => nclusters,
|
19
|
+
:mask => mask,
|
20
|
+
:weight => weight,
|
21
|
+
:transpose => false,
|
22
|
+
:passes => 100,
|
23
|
+
:method => 'a',
|
24
|
+
:dist => 'e'
|
25
|
+
|
26
|
+
clusterids.size.should == data.size
|
27
|
+
correct = [0,1,1,2]
|
28
|
+
mapping = nclusters.times.map { |n| clusterids[correct.index(n)] }
|
29
|
+
clusterids.each_with_index do |ci, i|
|
30
|
+
ci.should == mapping[correct[i]]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should run kcluster for a second set of data" do
|
35
|
+
nclusters = 3
|
36
|
+
weight = [1,1]
|
37
|
+
data = [ [ 1.1, 1.2 ],
|
38
|
+
[ 1.4, 1.3 ],
|
39
|
+
[ 1.1, 1.5 ],
|
40
|
+
[ 2.0, 1.5 ],
|
41
|
+
[ 1.7, 1.9 ],
|
42
|
+
[ 1.7, 1.9 ],
|
43
|
+
[ 5.7, 5.9 ],
|
44
|
+
[ 5.7, 5.9 ],
|
45
|
+
[ 3.1, 3.3 ],
|
46
|
+
[ 5.4, 5.3 ],
|
47
|
+
[ 5.1, 5.5 ],
|
48
|
+
[ 5.0, 5.5 ],
|
49
|
+
[ 5.1, 5.2 ]]
|
50
|
+
|
51
|
+
mask = [ [ 1, 1 ],
|
52
|
+
[ 1, 1 ],
|
53
|
+
[ 1, 1 ],
|
54
|
+
[ 1, 1 ],
|
55
|
+
[ 1, 1 ],
|
56
|
+
[ 1, 1 ],
|
57
|
+
[ 1, 1 ],
|
58
|
+
[ 1, 1 ],
|
59
|
+
[ 1, 1 ],
|
60
|
+
[ 1, 1 ],
|
61
|
+
[ 1, 1 ],
|
62
|
+
[ 1, 1 ],
|
63
|
+
[ 1, 1 ]]
|
64
|
+
|
65
|
+
clusterids, error, nfound = Cluster.kcluster data, :clusters => nclusters,
|
66
|
+
:mask => mask,
|
67
|
+
:weight => weight,
|
68
|
+
:transpose => false,
|
69
|
+
:passes => 100,
|
70
|
+
:method => 'a',
|
71
|
+
:dist => 'e'
|
72
|
+
|
73
|
+
clusterids.size.should == data.size
|
74
|
+
|
75
|
+
correct = [0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1]
|
76
|
+
mapping = nclusters.times.map { |n| clusterids[correct.index(n)] }
|
77
|
+
clusterids.each_with_index do |ci, i|
|
78
|
+
ci.should == mapping[correct[i]]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
it "raises ArgumentError if passed inconsistent data" do
|
83
|
+
lambda {
|
84
|
+
Cluster.kcluster [[1,2,3], [1,2,3,4]], {}
|
85
|
+
}.should raise_error(ArgumentError, "expected 3 columns, row has 4")
|
86
|
+
end
|
87
|
+
|
88
|
+
it "will use default options" do
|
89
|
+
data = [[1,1,1], [10,10,0], [0,0,0]]
|
90
|
+
clusterids, error, nfound = Cluster.kcluster(data, :passes => 1000)
|
91
|
+
|
92
|
+
clusterids.should be_kind_of(Array)
|
93
|
+
[[0, 1, 0], [1, 0, 1]].should include(clusterids)
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.kmedoids" do
|
4
|
+
it "should calculate kmedoids from a distance matrix" do
|
5
|
+
data = [[2.2, 3.3, 4.4],
|
6
|
+
[2.1, 1.4, 5.6],
|
7
|
+
[7.8, 9.0, 1.2],
|
8
|
+
[4.5, 2.3, 1.5],
|
9
|
+
[4.2, 2.4, 1.9],
|
10
|
+
[3.6, 3.1, 9.3],
|
11
|
+
[2.3, 1.2, 3.9],
|
12
|
+
[4.2, 9.6, 9.3],
|
13
|
+
[1.7, 8.9, 1.1]]
|
14
|
+
|
15
|
+
mask = [[1, 1, 1],
|
16
|
+
[1, 1, 1],
|
17
|
+
[0, 1, 1],
|
18
|
+
[1, 1, 1],
|
19
|
+
[1, 1, 1],
|
20
|
+
[0, 1, 0],
|
21
|
+
[1, 1, 1],
|
22
|
+
[1, 0, 1],
|
23
|
+
[1, 1, 1]]
|
24
|
+
|
25
|
+
weight = [2.0, 1.0, 0.5]
|
26
|
+
matrix = Cluster.distancematrix data, :mask => mask, :weight => weight
|
27
|
+
|
28
|
+
matrix[1][0].should be_within(0.001).of(1.243)
|
29
|
+
|
30
|
+
matrix[2][0].should be_within(0.001).of(25.073)
|
31
|
+
matrix[2][1].should be_within(0.001).of(44.960)
|
32
|
+
|
33
|
+
matrix[3][0].should be_within(0.001).of(4.510)
|
34
|
+
matrix[3][1].should be_within(0.001).of(5.924)
|
35
|
+
matrix[3][2].should be_within(0.001).of(29.957)
|
36
|
+
|
37
|
+
matrix[4][0].should be_within(0.001).of(3.410)
|
38
|
+
matrix[4][1].should be_within(0.001).of(4.761)
|
39
|
+
matrix[4][2].should be_within(0.001).of(29.203)
|
40
|
+
matrix[4][3].should be_within(0.001).of(0.077)
|
41
|
+
|
42
|
+
matrix[5][0].should be_within(0.001).of(0.040)
|
43
|
+
matrix[5][1].should be_within(0.001).of(2.890)
|
44
|
+
matrix[5][2].should be_within(0.001).of(34.810)
|
45
|
+
matrix[5][3].should be_within(0.001).of(0.640)
|
46
|
+
matrix[5][4].should be_within(0.001).of(0.490)
|
47
|
+
|
48
|
+
matrix[6][0].should be_within(0.001).of(1.301)
|
49
|
+
matrix[6][1].should be_within(0.001).of(0.447)
|
50
|
+
matrix[6][2].should be_within(0.001).of(42.990)
|
51
|
+
matrix[6][3].should be_within(0.001).of(3.934)
|
52
|
+
matrix[6][4].should be_within(0.001).of(3.046)
|
53
|
+
matrix[6][5].should be_within(0.001).of(3.610)
|
54
|
+
|
55
|
+
matrix[7][0].should be_within(0.001).of(8.002)
|
56
|
+
matrix[7][1].should be_within(0.001).of(6.266)
|
57
|
+
matrix[7][2].should be_within(0.001).of(65.610)
|
58
|
+
matrix[7][3].should be_within(0.001).of(12.240)
|
59
|
+
matrix[7][4].should be_within(0.001).of(10.952)
|
60
|
+
matrix[7][5].should be_within(0.001).of(0.000)
|
61
|
+
matrix[7][6].should be_within(0.001).of(8.720)
|
62
|
+
|
63
|
+
matrix[8][0].should be_within(0.001).of(10.659)
|
64
|
+
matrix[8][1].should be_within(0.001).of(19.056)
|
65
|
+
matrix[8][2].should be_within(0.001).of(0.010)
|
66
|
+
matrix[8][3].should be_within(0.001).of(16.949)
|
67
|
+
matrix[8][4].should be_within(0.001).of(15.734)
|
68
|
+
matrix[8][5].should be_within(0.001).of(33.640)
|
69
|
+
matrix[8][6].should be_within(0.001).of(18.266)
|
70
|
+
matrix[8][7].should be_within(0.001).of(18.448)
|
71
|
+
|
72
|
+
clusterid, error, nfound = Cluster.kmedoids matrix, :passes => 1000
|
73
|
+
|
74
|
+
clusterid[0].should == 5
|
75
|
+
clusterid[1].should == 5
|
76
|
+
clusterid[2].should == 2
|
77
|
+
clusterid[3].should == 5
|
78
|
+
clusterid[4].should == 5
|
79
|
+
clusterid[5].should == 5
|
80
|
+
clusterid[6].should == 5
|
81
|
+
clusterid[7].should == 5
|
82
|
+
clusterid[8].should == 2
|
83
|
+
|
84
|
+
error.should be_within(0.001).of(7.680)
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.{median,mean}" do
|
4
|
+
let(:data) {
|
5
|
+
[
|
6
|
+
[ 34.3, 3, 2 ],
|
7
|
+
[ 5, 10, 15, 20],
|
8
|
+
[ 1, 2, 3, 5, 7, 11, 13, 17],
|
9
|
+
[ 100, 19, 3, 1.5, 1.4, 1, 1, 1],
|
10
|
+
]
|
11
|
+
}
|
12
|
+
|
13
|
+
it "calculates the median" do
|
14
|
+
Cluster.median(data[0]).should == 3.0
|
15
|
+
Cluster.median(data[1]).should == 12.5
|
16
|
+
Cluster.median(data[2]).should == 6.0
|
17
|
+
Cluster.median(data[3]).should == 1.45
|
18
|
+
end
|
19
|
+
|
20
|
+
it "calculates the mean" do
|
21
|
+
Cluster.mean(data[0]).should be_within(0.001).of(13.1)
|
22
|
+
Cluster.mean(data[1]).should be_within(0.001).of(12.5)
|
23
|
+
Cluster.mean(data[2]).should be_within(0.001).of(7.375)
|
24
|
+
Cluster.mean(data[3]).should be_within(0.001).of(15.988)
|
25
|
+
end
|
26
|
+
end
|
data/spec/node_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Cluster
|
4
|
+
describe Node do
|
5
|
+
it "creates a new node with left/right" do
|
6
|
+
n = Node.new(2, 3)
|
7
|
+
n.left.should == 2
|
8
|
+
n.right.should == 3
|
9
|
+
end
|
10
|
+
|
11
|
+
it "takes an optional distance" do
|
12
|
+
n = Node.new(2, 3, 0.91)
|
13
|
+
|
14
|
+
n.left.should == 2
|
15
|
+
n.right.should == 3
|
16
|
+
n.distance.should == 0.91
|
17
|
+
end
|
18
|
+
|
19
|
+
it "is mutable" do
|
20
|
+
n = Node.new(2, 3, 0.91)
|
21
|
+
|
22
|
+
n.left = 4
|
23
|
+
n.right = 5
|
24
|
+
n.distance = 2.1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/spec/pca_spec.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.pca" do
|
4
|
+
it "performs principal component analysis where nrows > ncols" do
|
5
|
+
data = [
|
6
|
+
[ 3.1, 1.2 ],
|
7
|
+
[ 1.4, 1.3 ],
|
8
|
+
[ 1.1, 1.5 ],
|
9
|
+
[ 2.0, 1.5 ],
|
10
|
+
[ 1.7, 1.9 ],
|
11
|
+
[ 1.7, 1.9 ],
|
12
|
+
[ 5.7, 5.9 ],
|
13
|
+
[ 5.7, 5.9 ],
|
14
|
+
[ 3.1, 3.3 ],
|
15
|
+
[ 5.4, 5.3 ],
|
16
|
+
[ 5.1, 5.5 ],
|
17
|
+
[ 5.0, 5.5 ],
|
18
|
+
[ 5.1, 5.2 ],
|
19
|
+
]
|
20
|
+
|
21
|
+
mean, coordinates, pc, eigenvalues = Cluster.pca(data)
|
22
|
+
|
23
|
+
mean[0].should be_within(0.001).of(3.5461538461538464)
|
24
|
+
mean[1].should be_within(0.001).of(3.5307692307692311)
|
25
|
+
coordinates[0][0].should be_within(0.001).of(2.0323189722653883)
|
26
|
+
coordinates[0][1].should be_within(0.001).of(1.2252420399694917)
|
27
|
+
coordinates[1][0].should be_within(0.001).of(3.0936985166252251)
|
28
|
+
coordinates[1][1].should be_within(0.001).of(-0.10647619705157851)
|
29
|
+
coordinates[2][0].should be_within(0.001).of(3.1453186907749426)
|
30
|
+
coordinates[2][1].should be_within(0.001).of(-0.46331699855941139)
|
31
|
+
coordinates[3][0].should be_within(0.001).of(2.5440202962223761)
|
32
|
+
coordinates[3][1].should be_within(0.001).of(0.20633980959571077)
|
33
|
+
coordinates[4][0].should be_within(0.001).of(2.4468278463376221)
|
34
|
+
coordinates[4][1].should be_within(0.001).of(-0.28412285736824866)
|
35
|
+
coordinates[5][0].should be_within(0.001).of(2.4468278463376221)
|
36
|
+
coordinates[5][1].should be_within(0.001).of(-0.28412285736824866)
|
37
|
+
coordinates[6][0].should be_within(0.001).of(-3.2018619434743254)
|
38
|
+
coordinates[6][1].should be_within(0.001).of(0.019692314198662915)
|
39
|
+
coordinates[7][0].should be_within(0.001).of(-3.2018619434743254)
|
40
|
+
coordinates[7][1].should be_within(0.001).of(0.019692314198662915)
|
41
|
+
coordinates[8][0].should be_within(0.001).of(0.46978641990344067)
|
42
|
+
coordinates[8][1].should be_within(0.001).of(-0.17778754731982949)
|
43
|
+
coordinates[9][0].should be_within(0.001).of(-2.5549912731867215)
|
44
|
+
coordinates[9][1].should be_within(0.001).of(0.19733897451533403)
|
45
|
+
coordinates[10][0].should be_within(0.001).of(-2.5033710990370044)
|
46
|
+
coordinates[10][1].should be_within(0.001).of(-0.15950182699250004)
|
47
|
+
coordinates[11][0].should be_within(0.001).of(-2.4365601663089413)
|
48
|
+
coordinates[11][1].should be_within(0.001).of(-0.23390813900973562)
|
49
|
+
coordinates[12][0].should be_within(0.001).of(-2.2801521629852974)
|
50
|
+
coordinates[12][1].should be_within(0.001).of( 0.0409309711916888)
|
51
|
+
pc[0][0].should be_within(0.001).of(-0.66810932728062988)
|
52
|
+
pc[0][1].should be_within(0.001).of(-0.74406312017235743)
|
53
|
+
pc[1][0].should be_within(0.001).of( 0.74406312017235743)
|
54
|
+
pc[1][1].should be_within(0.001).of(-0.66810932728062988)
|
55
|
+
eigenvalues[0].should be_within(0.001).of( 9.3110471246032844)
|
56
|
+
eigenvalues[1].should be_within(0.001).of( 1.4437456297481428)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "performs principal component analysis where ncols > nrows" do
|
60
|
+
data = [[ 2.3, 4.5, 1.2, 6.7, 5.3, 7.1],
|
61
|
+
[ 1.3, 6.5, 2.2, 5.7, 6.2, 9.1],
|
62
|
+
[ 3.2, 7.2, 3.2, 7.4, 7.3, 8.9],
|
63
|
+
[ 4.2, 5.2, 9.2, 4.4, 6.3, 7.2]]
|
64
|
+
|
65
|
+
mean, coordinates, pc, eigenvalues = Cluster.pca(data)
|
66
|
+
|
67
|
+
mean[0].should be_within(0.001).of( 2.7500)
|
68
|
+
mean[1].should be_within(0.001).of( 5.8500)
|
69
|
+
mean[2].should be_within(0.001).of( 3.9500)
|
70
|
+
mean[3].should be_within(0.001).of( 6.0500)
|
71
|
+
mean[4].should be_within(0.001).of( 6.2750)
|
72
|
+
mean[5].should be_within(0.001).of( 8.0750)
|
73
|
+
coordinates[0][0].should be_within(0.001).of(2.6460846688406905)
|
74
|
+
coordinates[0][1].should be_within(0.001).of(-2.1421701432732418)
|
75
|
+
coordinates[0][2].should be_within(0.001).of(-0.56620932754145858)
|
76
|
+
coordinates[0][3].should be_within(0.001).of(0.0)
|
77
|
+
coordinates[1][0].should be_within(0.001).of(2.0644120899917544)
|
78
|
+
coordinates[1][1].should be_within(0.001).of(0.55542108669180323)
|
79
|
+
coordinates[1][2].should be_within(0.001).of(1.4818772348457117)
|
80
|
+
coordinates[1][3].should be_within(0.001).of(0.0)
|
81
|
+
coordinates[2][0].should be_within(0.001).of(1.0686641862092987)
|
82
|
+
coordinates[2][1].should be_within(0.001).of(1.9994412069101073)
|
83
|
+
coordinates[2][2].should be_within(0.001).of(-1.000720598980291)
|
84
|
+
coordinates[2][3].should be_within(0.001).of(0.0)
|
85
|
+
coordinates[3][0].should be_within(0.001).of(-5.77916094504174)
|
86
|
+
coordinates[3][1].should be_within(0.001).of(-0.41269215032867046)
|
87
|
+
coordinates[3][2].should be_within(0.001).of(0.085052691676038017)
|
88
|
+
coordinates[3][3].should be_within(0.001).of(0.0)
|
89
|
+
pc[0][0].should be_within(0.001).of(-0.26379660005997291)
|
90
|
+
pc[0][1].should be_within(0.001).of( 0.064814972617134495)
|
91
|
+
pc[0][2].should be_within(0.001).of(-0.91763310094893846)
|
92
|
+
pc[0][3].should be_within(0.001).of( 0.26145408875373249)
|
93
|
+
pc[1][0].should be_within(0.001).of( 0.05073770520434398)
|
94
|
+
pc[1][1].should be_within(0.001).of( 0.68616983388698793)
|
95
|
+
pc[1][2].should be_within(0.001).of( 0.13819106187213354)
|
96
|
+
pc[1][3].should be_within(0.001).of( 0.19782544121828985)
|
97
|
+
pc[2][0].should be_within(0.001).of(-0.63000893660095947)
|
98
|
+
pc[2][1].should be_within(0.001).of( 0.091155993862151397)
|
99
|
+
pc[2][2].should be_within(0.001).of( 0.045630391256086845)
|
100
|
+
pc[2][3].should be_within(0.001).of(-0.67456694780914772)
|
101
|
+
|
102
|
+
# As the last eigenvalue is zero, the corresponding eigenvector is
|
103
|
+
# strongly affected by roundoff error, and is not being tested here.
|
104
|
+
# For PCA, this doesn't matter since all data have a zero coefficient
|
105
|
+
# along this eigenvector.
|
106
|
+
|
107
|
+
eigenvalues[0].should be_within(0.001).of( 6.7678878332578778)
|
108
|
+
eigenvalues[1].should be_within(0.001).of( 3.0108911400291856)
|
109
|
+
eigenvalues[2].should be_within(0.001).of( 1.8775592718563467)
|
110
|
+
eigenvalues[3].should be_within(0.001).of( 0.0)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|