rbcluster 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/.travis.yml +6 -0
- data/Gemfile +4 -0
- data/LICENSE +29 -0
- data/README.md +54 -0
- data/Rakefile +17 -0
- data/examples/simple_kcluster.rb +10 -0
- data/ext/rbcluster/cluster.c +4598 -0
- data/ext/rbcluster/cluster.h +93 -0
- data/ext/rbcluster/extconf.rb +6 -0
- data/ext/rbcluster/rbcluster.c +775 -0
- data/lib/rbcluster.rb +5 -0
- data/lib/rbcluster/tree.rb +20 -0
- data/lib/rbcluster/version.rb +3 -0
- data/rbcluster.gemspec +24 -0
- data/spec/clustercentroids_spec.rb +6 -0
- data/spec/clusterdistance_spec.rb +106 -0
- data/spec/clustermedoids_spec.rb +6 -0
- data/spec/cuttree_spec.rb +6 -0
- data/spec/kcluster_spec.rb +95 -0
- data/spec/kmedoids_spec.rb +86 -0
- data/spec/median_mean_spec.rb +26 -0
- data/spec/node_spec.rb +27 -0
- data/spec/pca_spec.rb +113 -0
- data/spec/somcluster_spec.rb +81 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/treecluster_spec.rb +412 -0
- metadata +110 -0
data/lib/rbcluster.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Cluster
|
2
|
+
class Tree
|
3
|
+
def initialize(nodes)
|
4
|
+
raise NotImplementedError, "patches welcome :)"
|
5
|
+
|
6
|
+
nodes.each_with_index do |node, idx|
|
7
|
+
unless node.kind_of?(Node)
|
8
|
+
raise ArgumentError, "expected #{Node.class}, got #{node.class} at index #{idx}"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
@nodes = nodes
|
13
|
+
end
|
14
|
+
|
15
|
+
def size
|
16
|
+
@nodes.size
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
data/rbcluster.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "rbcluster/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "rbcluster"
|
7
|
+
s.version = Cluster::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Jari Bakken", "Michiel Jan Laurens de Hoon"]
|
10
|
+
s.email = ["jari.bakken@gmail.com"]
|
11
|
+
s.homepage = "http://bonsai.hgc.jp/~mdehoon/software/cluster/software.htm"
|
12
|
+
s.summary = %q{Ruby bindings for the Cluster C library}
|
13
|
+
s.description = %q{This gem provides a Ruby extension to the clustering routines in the C Clustering Library (which also backs e.g. Python's pycluster and Perl's Algorithm::Cluster).}
|
14
|
+
|
15
|
+
s.rubyforge_project = "rbcluster"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- spec/*`.split("\n")
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
s.extensions = `git ls-files -- ext/**/extconf.rb`.split("\n")
|
21
|
+
|
22
|
+
s.add_development_dependency "rake-compiler"
|
23
|
+
s.add_development_dependency "rspec", "~> 2.6.0"
|
24
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.clusterdistance" do
|
4
|
+
it "calculates distances for data set 1" do
|
5
|
+
weight = [ 1,1,1,1,1 ]
|
6
|
+
data = [[ 1.1, 2.2, 3.3, 4.4, 5.5, ],
|
7
|
+
[ 3.1, 3.2, 1.3, 2.4, 1.5, ],
|
8
|
+
[ 4.1, 2.2, 0.3, 5.4, 0.5, ],
|
9
|
+
[ 12.1, 2.0, 0.0, 5.0, 0.0, ]]
|
10
|
+
|
11
|
+
mask = [[ 1, 1, 1, 1, 1],
|
12
|
+
[ 1, 1, 1, 1, 1],
|
13
|
+
[ 1, 1, 1, 1, 1],
|
14
|
+
[ 1, 1, 1, 1, 1]]
|
15
|
+
|
16
|
+
# Cluster assignments
|
17
|
+
c1 = [0]
|
18
|
+
c2 = [1,2]
|
19
|
+
c3 = [3]
|
20
|
+
|
21
|
+
distance = Cluster.clusterdistance data, c1, c2, :mask => mask,
|
22
|
+
:weight => weight,
|
23
|
+
:dist => 'e',
|
24
|
+
:method => 'a',
|
25
|
+
:transpose => false
|
26
|
+
|
27
|
+
distance.should be_within(0.001).of(6.650)
|
28
|
+
|
29
|
+
distance = Cluster.clusterdistance data, c1, c3, :mask => mask,
|
30
|
+
:weight => weight,
|
31
|
+
:dist => 'e',
|
32
|
+
:method => 'a',
|
33
|
+
:transpose => false
|
34
|
+
|
35
|
+
distance.should be_within(0.001).of(32.508)
|
36
|
+
|
37
|
+
distance = Cluster.clusterdistance data, c2, c3, :mask => mask,
|
38
|
+
:weight => weight,
|
39
|
+
:dist => 'e',
|
40
|
+
:method => 'a',
|
41
|
+
:transpose => false
|
42
|
+
|
43
|
+
distance.should be_within(0.001).of(15.118)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "calculates distances for data set 2" do
|
47
|
+
weight = [ 1,1 ]
|
48
|
+
data = [[ 1.1, 1.2 ],
|
49
|
+
[ 1.4, 1.3 ],
|
50
|
+
[ 1.1, 1.5 ],
|
51
|
+
[ 2.0, 1.5 ],
|
52
|
+
[ 1.7, 1.9 ],
|
53
|
+
[ 1.7, 1.9 ],
|
54
|
+
[ 5.7, 5.9 ],
|
55
|
+
[ 5.7, 5.9 ],
|
56
|
+
[ 3.1, 3.3 ],
|
57
|
+
[ 5.4, 5.3 ],
|
58
|
+
[ 5.1, 5.5 ],
|
59
|
+
[ 5.0, 5.5 ],
|
60
|
+
[ 5.1, 5.2 ]]
|
61
|
+
mask = [[ 1, 1 ],
|
62
|
+
[ 1, 1 ],
|
63
|
+
[ 1, 1 ],
|
64
|
+
[ 1, 1 ],
|
65
|
+
[ 1, 1 ],
|
66
|
+
[ 1, 1 ],
|
67
|
+
[ 1, 1 ],
|
68
|
+
[ 1, 1 ],
|
69
|
+
[ 1, 1 ],
|
70
|
+
[ 1, 1 ],
|
71
|
+
[ 1, 1 ],
|
72
|
+
[ 1, 1 ],
|
73
|
+
[ 1, 1 ]]
|
74
|
+
|
75
|
+
# Cluster assignments
|
76
|
+
c1 = [ 0, 1, 2, 3 ]
|
77
|
+
c2 = [ 4, 5, 6, 7 ]
|
78
|
+
c3 = [ 8 ]
|
79
|
+
|
80
|
+
distance = Cluster.clusterdistance data, c1, c2, :mask => mask,
|
81
|
+
:weight => weight,
|
82
|
+
:dist => 'e',
|
83
|
+
:method => 'a',
|
84
|
+
:transpose => false
|
85
|
+
|
86
|
+
distance.should be_within(0.001).of(5.833)
|
87
|
+
|
88
|
+
distance = Cluster.clusterdistance data, c1, c3, :mask => mask,
|
89
|
+
:weight => weight,
|
90
|
+
:dist => 'e',
|
91
|
+
:method => 'a',
|
92
|
+
:transpose => false
|
93
|
+
|
94
|
+
distance.should be_within(0.001).of(3.298)
|
95
|
+
|
96
|
+
|
97
|
+
distance = Cluster.clusterdistance data, c2, c3, :mask => mask,
|
98
|
+
:weight => weight,
|
99
|
+
:dist => 'e',
|
100
|
+
:method => 'a',
|
101
|
+
:transpose => false
|
102
|
+
|
103
|
+
distance.should be_within(0.001).of(0.360)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.kcluster" do
|
4
|
+
it "should run kcluster for the given data" do
|
5
|
+
nclusters = 3
|
6
|
+
# First data set
|
7
|
+
weight = [1,1,1,1,1]
|
8
|
+
data = [[ 1.1, 2.2, 3.3, 4.4, 5.5],
|
9
|
+
[ 3.1, 3.2, 1.3, 2.4, 1.5],
|
10
|
+
[ 4.1, 2.2, 0.3, 5.4, 0.5],
|
11
|
+
[12.1, 2.0, 0.0, 5.0, 0.0]]
|
12
|
+
mask = [[ 1, 1, 1, 1, 1],
|
13
|
+
[ 1, 1, 1, 1, 1],
|
14
|
+
[ 1, 1, 1, 1, 1],
|
15
|
+
[ 1, 1, 1, 1, 1]]
|
16
|
+
|
17
|
+
|
18
|
+
clusterids, error, nfound = Cluster.kcluster data, :clusters => nclusters,
|
19
|
+
:mask => mask,
|
20
|
+
:weight => weight,
|
21
|
+
:transpose => false,
|
22
|
+
:passes => 100,
|
23
|
+
:method => 'a',
|
24
|
+
:dist => 'e'
|
25
|
+
|
26
|
+
clusterids.size.should == data.size
|
27
|
+
correct = [0,1,1,2]
|
28
|
+
mapping = nclusters.times.map { |n| clusterids[correct.index(n)] }
|
29
|
+
clusterids.each_with_index do |ci, i|
|
30
|
+
ci.should == mapping[correct[i]]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should run kcluster for a second set of data" do
|
35
|
+
nclusters = 3
|
36
|
+
weight = [1,1]
|
37
|
+
data = [ [ 1.1, 1.2 ],
|
38
|
+
[ 1.4, 1.3 ],
|
39
|
+
[ 1.1, 1.5 ],
|
40
|
+
[ 2.0, 1.5 ],
|
41
|
+
[ 1.7, 1.9 ],
|
42
|
+
[ 1.7, 1.9 ],
|
43
|
+
[ 5.7, 5.9 ],
|
44
|
+
[ 5.7, 5.9 ],
|
45
|
+
[ 3.1, 3.3 ],
|
46
|
+
[ 5.4, 5.3 ],
|
47
|
+
[ 5.1, 5.5 ],
|
48
|
+
[ 5.0, 5.5 ],
|
49
|
+
[ 5.1, 5.2 ]]
|
50
|
+
|
51
|
+
mask = [ [ 1, 1 ],
|
52
|
+
[ 1, 1 ],
|
53
|
+
[ 1, 1 ],
|
54
|
+
[ 1, 1 ],
|
55
|
+
[ 1, 1 ],
|
56
|
+
[ 1, 1 ],
|
57
|
+
[ 1, 1 ],
|
58
|
+
[ 1, 1 ],
|
59
|
+
[ 1, 1 ],
|
60
|
+
[ 1, 1 ],
|
61
|
+
[ 1, 1 ],
|
62
|
+
[ 1, 1 ],
|
63
|
+
[ 1, 1 ]]
|
64
|
+
|
65
|
+
clusterids, error, nfound = Cluster.kcluster data, :clusters => nclusters,
|
66
|
+
:mask => mask,
|
67
|
+
:weight => weight,
|
68
|
+
:transpose => false,
|
69
|
+
:passes => 100,
|
70
|
+
:method => 'a',
|
71
|
+
:dist => 'e'
|
72
|
+
|
73
|
+
clusterids.size.should == data.size
|
74
|
+
|
75
|
+
correct = [0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1]
|
76
|
+
mapping = nclusters.times.map { |n| clusterids[correct.index(n)] }
|
77
|
+
clusterids.each_with_index do |ci, i|
|
78
|
+
ci.should == mapping[correct[i]]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
it "raises ArgumentError if passed inconsistent data" do
|
83
|
+
lambda {
|
84
|
+
Cluster.kcluster [[1,2,3], [1,2,3,4]], {}
|
85
|
+
}.should raise_error(ArgumentError, "expected 3 columns, row has 4")
|
86
|
+
end
|
87
|
+
|
88
|
+
it "will use default options" do
|
89
|
+
data = [[1,1,1], [10,10,0], [0,0,0]]
|
90
|
+
clusterids, error, nfound = Cluster.kcluster(data, :passes => 1000)
|
91
|
+
|
92
|
+
clusterids.should be_kind_of(Array)
|
93
|
+
[[0, 1, 0], [1, 0, 1]].should include(clusterids)
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.kmedoids" do
|
4
|
+
it "should calculate kmedoids from a distance matrix" do
|
5
|
+
data = [[2.2, 3.3, 4.4],
|
6
|
+
[2.1, 1.4, 5.6],
|
7
|
+
[7.8, 9.0, 1.2],
|
8
|
+
[4.5, 2.3, 1.5],
|
9
|
+
[4.2, 2.4, 1.9],
|
10
|
+
[3.6, 3.1, 9.3],
|
11
|
+
[2.3, 1.2, 3.9],
|
12
|
+
[4.2, 9.6, 9.3],
|
13
|
+
[1.7, 8.9, 1.1]]
|
14
|
+
|
15
|
+
mask = [[1, 1, 1],
|
16
|
+
[1, 1, 1],
|
17
|
+
[0, 1, 1],
|
18
|
+
[1, 1, 1],
|
19
|
+
[1, 1, 1],
|
20
|
+
[0, 1, 0],
|
21
|
+
[1, 1, 1],
|
22
|
+
[1, 0, 1],
|
23
|
+
[1, 1, 1]]
|
24
|
+
|
25
|
+
weight = [2.0, 1.0, 0.5]
|
26
|
+
matrix = Cluster.distancematrix data, :mask => mask, :weight => weight
|
27
|
+
|
28
|
+
matrix[1][0].should be_within(0.001).of(1.243)
|
29
|
+
|
30
|
+
matrix[2][0].should be_within(0.001).of(25.073)
|
31
|
+
matrix[2][1].should be_within(0.001).of(44.960)
|
32
|
+
|
33
|
+
matrix[3][0].should be_within(0.001).of(4.510)
|
34
|
+
matrix[3][1].should be_within(0.001).of(5.924)
|
35
|
+
matrix[3][2].should be_within(0.001).of(29.957)
|
36
|
+
|
37
|
+
matrix[4][0].should be_within(0.001).of(3.410)
|
38
|
+
matrix[4][1].should be_within(0.001).of(4.761)
|
39
|
+
matrix[4][2].should be_within(0.001).of(29.203)
|
40
|
+
matrix[4][3].should be_within(0.001).of(0.077)
|
41
|
+
|
42
|
+
matrix[5][0].should be_within(0.001).of(0.040)
|
43
|
+
matrix[5][1].should be_within(0.001).of(2.890)
|
44
|
+
matrix[5][2].should be_within(0.001).of(34.810)
|
45
|
+
matrix[5][3].should be_within(0.001).of(0.640)
|
46
|
+
matrix[5][4].should be_within(0.001).of(0.490)
|
47
|
+
|
48
|
+
matrix[6][0].should be_within(0.001).of(1.301)
|
49
|
+
matrix[6][1].should be_within(0.001).of(0.447)
|
50
|
+
matrix[6][2].should be_within(0.001).of(42.990)
|
51
|
+
matrix[6][3].should be_within(0.001).of(3.934)
|
52
|
+
matrix[6][4].should be_within(0.001).of(3.046)
|
53
|
+
matrix[6][5].should be_within(0.001).of(3.610)
|
54
|
+
|
55
|
+
matrix[7][0].should be_within(0.001).of(8.002)
|
56
|
+
matrix[7][1].should be_within(0.001).of(6.266)
|
57
|
+
matrix[7][2].should be_within(0.001).of(65.610)
|
58
|
+
matrix[7][3].should be_within(0.001).of(12.240)
|
59
|
+
matrix[7][4].should be_within(0.001).of(10.952)
|
60
|
+
matrix[7][5].should be_within(0.001).of(0.000)
|
61
|
+
matrix[7][6].should be_within(0.001).of(8.720)
|
62
|
+
|
63
|
+
matrix[8][0].should be_within(0.001).of(10.659)
|
64
|
+
matrix[8][1].should be_within(0.001).of(19.056)
|
65
|
+
matrix[8][2].should be_within(0.001).of(0.010)
|
66
|
+
matrix[8][3].should be_within(0.001).of(16.949)
|
67
|
+
matrix[8][4].should be_within(0.001).of(15.734)
|
68
|
+
matrix[8][5].should be_within(0.001).of(33.640)
|
69
|
+
matrix[8][6].should be_within(0.001).of(18.266)
|
70
|
+
matrix[8][7].should be_within(0.001).of(18.448)
|
71
|
+
|
72
|
+
clusterid, error, nfound = Cluster.kmedoids matrix, :passes => 1000
|
73
|
+
|
74
|
+
clusterid[0].should == 5
|
75
|
+
clusterid[1].should == 5
|
76
|
+
clusterid[2].should == 2
|
77
|
+
clusterid[3].should == 5
|
78
|
+
clusterid[4].should == 5
|
79
|
+
clusterid[5].should == 5
|
80
|
+
clusterid[6].should == 5
|
81
|
+
clusterid[7].should == 5
|
82
|
+
clusterid[8].should == 2
|
83
|
+
|
84
|
+
error.should be_within(0.001).of(7.680)
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.{median,mean}" do
|
4
|
+
let(:data) {
|
5
|
+
[
|
6
|
+
[ 34.3, 3, 2 ],
|
7
|
+
[ 5, 10, 15, 20],
|
8
|
+
[ 1, 2, 3, 5, 7, 11, 13, 17],
|
9
|
+
[ 100, 19, 3, 1.5, 1.4, 1, 1, 1],
|
10
|
+
]
|
11
|
+
}
|
12
|
+
|
13
|
+
it "calculates the median" do
|
14
|
+
Cluster.median(data[0]).should == 3.0
|
15
|
+
Cluster.median(data[1]).should == 12.5
|
16
|
+
Cluster.median(data[2]).should == 6.0
|
17
|
+
Cluster.median(data[3]).should == 1.45
|
18
|
+
end
|
19
|
+
|
20
|
+
it "calculates the mean" do
|
21
|
+
Cluster.mean(data[0]).should be_within(0.001).of(13.1)
|
22
|
+
Cluster.mean(data[1]).should be_within(0.001).of(12.5)
|
23
|
+
Cluster.mean(data[2]).should be_within(0.001).of(7.375)
|
24
|
+
Cluster.mean(data[3]).should be_within(0.001).of(15.988)
|
25
|
+
end
|
26
|
+
end
|
data/spec/node_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Cluster
|
4
|
+
describe Node do
|
5
|
+
it "creates a new node with left/right" do
|
6
|
+
n = Node.new(2, 3)
|
7
|
+
n.left.should == 2
|
8
|
+
n.right.should == 3
|
9
|
+
end
|
10
|
+
|
11
|
+
it "takes an optional distance" do
|
12
|
+
n = Node.new(2, 3, 0.91)
|
13
|
+
|
14
|
+
n.left.should == 2
|
15
|
+
n.right.should == 3
|
16
|
+
n.distance.should == 0.91
|
17
|
+
end
|
18
|
+
|
19
|
+
it "is mutable" do
|
20
|
+
n = Node.new(2, 3, 0.91)
|
21
|
+
|
22
|
+
n.left = 4
|
23
|
+
n.right = 5
|
24
|
+
n.distance = 2.1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/spec/pca_spec.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "Cluster.pca" do
|
4
|
+
it "performs principal component analysis where nrows > ncols" do
|
5
|
+
data = [
|
6
|
+
[ 3.1, 1.2 ],
|
7
|
+
[ 1.4, 1.3 ],
|
8
|
+
[ 1.1, 1.5 ],
|
9
|
+
[ 2.0, 1.5 ],
|
10
|
+
[ 1.7, 1.9 ],
|
11
|
+
[ 1.7, 1.9 ],
|
12
|
+
[ 5.7, 5.9 ],
|
13
|
+
[ 5.7, 5.9 ],
|
14
|
+
[ 3.1, 3.3 ],
|
15
|
+
[ 5.4, 5.3 ],
|
16
|
+
[ 5.1, 5.5 ],
|
17
|
+
[ 5.0, 5.5 ],
|
18
|
+
[ 5.1, 5.2 ],
|
19
|
+
]
|
20
|
+
|
21
|
+
mean, coordinates, pc, eigenvalues = Cluster.pca(data)
|
22
|
+
|
23
|
+
mean[0].should be_within(0.001).of(3.5461538461538464)
|
24
|
+
mean[1].should be_within(0.001).of(3.5307692307692311)
|
25
|
+
coordinates[0][0].should be_within(0.001).of(2.0323189722653883)
|
26
|
+
coordinates[0][1].should be_within(0.001).of(1.2252420399694917)
|
27
|
+
coordinates[1][0].should be_within(0.001).of(3.0936985166252251)
|
28
|
+
coordinates[1][1].should be_within(0.001).of(-0.10647619705157851)
|
29
|
+
coordinates[2][0].should be_within(0.001).of(3.1453186907749426)
|
30
|
+
coordinates[2][1].should be_within(0.001).of(-0.46331699855941139)
|
31
|
+
coordinates[3][0].should be_within(0.001).of(2.5440202962223761)
|
32
|
+
coordinates[3][1].should be_within(0.001).of(0.20633980959571077)
|
33
|
+
coordinates[4][0].should be_within(0.001).of(2.4468278463376221)
|
34
|
+
coordinates[4][1].should be_within(0.001).of(-0.28412285736824866)
|
35
|
+
coordinates[5][0].should be_within(0.001).of(2.4468278463376221)
|
36
|
+
coordinates[5][1].should be_within(0.001).of(-0.28412285736824866)
|
37
|
+
coordinates[6][0].should be_within(0.001).of(-3.2018619434743254)
|
38
|
+
coordinates[6][1].should be_within(0.001).of(0.019692314198662915)
|
39
|
+
coordinates[7][0].should be_within(0.001).of(-3.2018619434743254)
|
40
|
+
coordinates[7][1].should be_within(0.001).of(0.019692314198662915)
|
41
|
+
coordinates[8][0].should be_within(0.001).of(0.46978641990344067)
|
42
|
+
coordinates[8][1].should be_within(0.001).of(-0.17778754731982949)
|
43
|
+
coordinates[9][0].should be_within(0.001).of(-2.5549912731867215)
|
44
|
+
coordinates[9][1].should be_within(0.001).of(0.19733897451533403)
|
45
|
+
coordinates[10][0].should be_within(0.001).of(-2.5033710990370044)
|
46
|
+
coordinates[10][1].should be_within(0.001).of(-0.15950182699250004)
|
47
|
+
coordinates[11][0].should be_within(0.001).of(-2.4365601663089413)
|
48
|
+
coordinates[11][1].should be_within(0.001).of(-0.23390813900973562)
|
49
|
+
coordinates[12][0].should be_within(0.001).of(-2.2801521629852974)
|
50
|
+
coordinates[12][1].should be_within(0.001).of( 0.0409309711916888)
|
51
|
+
pc[0][0].should be_within(0.001).of(-0.66810932728062988)
|
52
|
+
pc[0][1].should be_within(0.001).of(-0.74406312017235743)
|
53
|
+
pc[1][0].should be_within(0.001).of( 0.74406312017235743)
|
54
|
+
pc[1][1].should be_within(0.001).of(-0.66810932728062988)
|
55
|
+
eigenvalues[0].should be_within(0.001).of( 9.3110471246032844)
|
56
|
+
eigenvalues[1].should be_within(0.001).of( 1.4437456297481428)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "performs principal component analysis where ncols > nrows" do
|
60
|
+
data = [[ 2.3, 4.5, 1.2, 6.7, 5.3, 7.1],
|
61
|
+
[ 1.3, 6.5, 2.2, 5.7, 6.2, 9.1],
|
62
|
+
[ 3.2, 7.2, 3.2, 7.4, 7.3, 8.9],
|
63
|
+
[ 4.2, 5.2, 9.2, 4.4, 6.3, 7.2]]
|
64
|
+
|
65
|
+
mean, coordinates, pc, eigenvalues = Cluster.pca(data)
|
66
|
+
|
67
|
+
mean[0].should be_within(0.001).of( 2.7500)
|
68
|
+
mean[1].should be_within(0.001).of( 5.8500)
|
69
|
+
mean[2].should be_within(0.001).of( 3.9500)
|
70
|
+
mean[3].should be_within(0.001).of( 6.0500)
|
71
|
+
mean[4].should be_within(0.001).of( 6.2750)
|
72
|
+
mean[5].should be_within(0.001).of( 8.0750)
|
73
|
+
coordinates[0][0].should be_within(0.001).of(2.6460846688406905)
|
74
|
+
coordinates[0][1].should be_within(0.001).of(-2.1421701432732418)
|
75
|
+
coordinates[0][2].should be_within(0.001).of(-0.56620932754145858)
|
76
|
+
coordinates[0][3].should be_within(0.001).of(0.0)
|
77
|
+
coordinates[1][0].should be_within(0.001).of(2.0644120899917544)
|
78
|
+
coordinates[1][1].should be_within(0.001).of(0.55542108669180323)
|
79
|
+
coordinates[1][2].should be_within(0.001).of(1.4818772348457117)
|
80
|
+
coordinates[1][3].should be_within(0.001).of(0.0)
|
81
|
+
coordinates[2][0].should be_within(0.001).of(1.0686641862092987)
|
82
|
+
coordinates[2][1].should be_within(0.001).of(1.9994412069101073)
|
83
|
+
coordinates[2][2].should be_within(0.001).of(-1.000720598980291)
|
84
|
+
coordinates[2][3].should be_within(0.001).of(0.0)
|
85
|
+
coordinates[3][0].should be_within(0.001).of(-5.77916094504174)
|
86
|
+
coordinates[3][1].should be_within(0.001).of(-0.41269215032867046)
|
87
|
+
coordinates[3][2].should be_within(0.001).of(0.085052691676038017)
|
88
|
+
coordinates[3][3].should be_within(0.001).of(0.0)
|
89
|
+
pc[0][0].should be_within(0.001).of(-0.26379660005997291)
|
90
|
+
pc[0][1].should be_within(0.001).of( 0.064814972617134495)
|
91
|
+
pc[0][2].should be_within(0.001).of(-0.91763310094893846)
|
92
|
+
pc[0][3].should be_within(0.001).of( 0.26145408875373249)
|
93
|
+
pc[1][0].should be_within(0.001).of( 0.05073770520434398)
|
94
|
+
pc[1][1].should be_within(0.001).of( 0.68616983388698793)
|
95
|
+
pc[1][2].should be_within(0.001).of( 0.13819106187213354)
|
96
|
+
pc[1][3].should be_within(0.001).of( 0.19782544121828985)
|
97
|
+
pc[2][0].should be_within(0.001).of(-0.63000893660095947)
|
98
|
+
pc[2][1].should be_within(0.001).of( 0.091155993862151397)
|
99
|
+
pc[2][2].should be_within(0.001).of( 0.045630391256086845)
|
100
|
+
pc[2][3].should be_within(0.001).of(-0.67456694780914772)
|
101
|
+
|
102
|
+
# As the last eigenvalue is zero, the corresponding eigenvector is
|
103
|
+
# strongly affected by roundoff error, and is not being tested here.
|
104
|
+
# For PCA, this doesn't matter since all data have a zero coefficient
|
105
|
+
# along this eigenvector.
|
106
|
+
|
107
|
+
eigenvalues[0].should be_within(0.001).of( 6.7678878332578778)
|
108
|
+
eigenvalues[1].should be_within(0.001).of( 3.0108911400291856)
|
109
|
+
eigenvalues[2].should be_within(0.001).of( 1.8775592718563467)
|
110
|
+
eigenvalues[3].should be_within(0.001).of( 0.0)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|