Empact-hierclust 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Empact-hierclust.gemspec +2 -2
- data/History.txt +6 -0
- data/VERSION +1 -1
- data/lib/hierclust/cluster.rb +2 -2
- data/lib/hierclust/clusterer.rb +6 -5
- data/lib/hierclust/distances.rb +2 -2
- data/lib/hierclust/point.rb +11 -4
- data/spec/hierclust/cluster_spec.rb +64 -0
- data/spec/hierclust/clusterer_spec.rb +34 -5
- metadata +4 -4
data/Empact-hierclust.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{Empact-hierclust}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Brandt Kurowski", "Ben Woosley"]
|
12
|
-
s.date = %q{2010-11-
|
12
|
+
s.date = %q{2010-11-02}
|
13
13
|
s.description = %q{performs hierarchical clustering on points in Euclidian space}
|
14
14
|
s.email = %q{ben.woosley@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/History.txt
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
data/lib/hierclust/cluster.rb
CHANGED
@@ -13,10 +13,10 @@ module Hierclust
|
|
13
13
|
end
|
14
14
|
|
15
15
|
# Returns the average coordinates of all items in this Cluster.
|
16
|
-
def coordinates
|
16
|
+
def coordinates(nils = nil)
|
17
17
|
return nil if size == 0
|
18
18
|
@coordinates ||= begin
|
19
|
-
coords = self.points.map {|p| p.coordinates }
|
19
|
+
coords = self.points.map {|p| p.coordinates(nils) }
|
20
20
|
coords = coords.shift.zip(*coords)
|
21
21
|
coords.map {|points| points.inject(0.0) {|sum, p| sum + p } / points.size }
|
22
22
|
end
|
data/lib/hierclust/clusterer.rb
CHANGED
@@ -16,18 +16,19 @@ module Hierclust
|
|
16
16
|
# clustered, but will be put into clusters based strictly on coordinates.
|
17
17
|
# The clusters generated by this "pre-clustering" will then be
|
18
18
|
# hierarchically clustered as normal.
|
19
|
-
def initialize(data,
|
20
|
-
@separation = separation
|
21
|
-
@resolution = resolution
|
19
|
+
def initialize(data, options = {})
|
20
|
+
@separation = options.delete(:separation)
|
21
|
+
@resolution = options.delete(:resolution)
|
22
|
+
@nils = options.delete(:nils)
|
22
23
|
@data = precluster(data)
|
23
|
-
@distances = Distances.new(@data)
|
24
|
+
@distances = Distances.new(@data, @nils)
|
24
25
|
end
|
25
26
|
|
26
27
|
# Calculates and returns the set of clusters.
|
27
28
|
def clusters
|
28
29
|
return @data if @separation && @distances.separation > @separation
|
29
30
|
while @data.length > 1
|
30
|
-
@distances = Distances.new(@data)
|
31
|
+
@distances = Distances.new(@data, @nils)
|
31
32
|
return @data if @separation && @distances.separation > @separation
|
32
33
|
@data = find_cluster
|
33
34
|
end
|
data/lib/hierclust/distances.rb
CHANGED
@@ -4,7 +4,7 @@ module Hierclust
|
|
4
4
|
attr_reader :nearest, :outliers, :separation
|
5
5
|
|
6
6
|
# Create a new Distances for the given +items+
|
7
|
-
def initialize(items)
|
7
|
+
def initialize(items, nils = nil)
|
8
8
|
@items = items
|
9
9
|
@separation = 0
|
10
10
|
@nearest = []
|
@@ -12,7 +12,7 @@ module Hierclust
|
|
12
12
|
while !items.empty?
|
13
13
|
origin = items.shift
|
14
14
|
items.each do |other|
|
15
|
-
distance = origin.distance_to(other)
|
15
|
+
distance = origin.distance_to(other, nils)
|
16
16
|
if @separation == 0 or distance < @separation
|
17
17
|
@separation = distance
|
18
18
|
@nearest = [origin, other]
|
data/lib/hierclust/point.rb
CHANGED
@@ -2,18 +2,25 @@ module Hierclust
|
|
2
2
|
# A Point represents a single point in n-dimensional space.
|
3
3
|
class Point
|
4
4
|
# x-coordinate
|
5
|
-
attr_accessor :coordinates
|
6
5
|
attr_accessor :data
|
7
6
|
|
8
7
|
# Create a new Point with the given coordinates.
|
9
8
|
def initialize(*coordinates)
|
10
9
|
@data = coordinates.last.is_a?(Hash) ? coordinates.pop : {}
|
11
|
-
@coordinates = coordinates
|
10
|
+
@coordinates = coordinates.flatten
|
12
11
|
end
|
13
12
|
|
13
|
+
def coordinates(nils = nil)
|
14
|
+
if nils
|
15
|
+
@coordinates.map {|c| c || nils }
|
16
|
+
else
|
17
|
+
@coordinates
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
14
21
|
# Returns this distance from this Point to an +other+ Point.
|
15
|
-
def distance_to(other)
|
16
|
-
sum_of_squares = coordinates.zip(other.coordinates).map do |point, other_point|
|
22
|
+
def distance_to(other, nils = nil)
|
23
|
+
sum_of_squares = coordinates(nils).zip(other.coordinates(nils)).map do |point, other_point|
|
17
24
|
(other_point - point) ** 2
|
18
25
|
end.inject(0) {|sum, distance| sum + distance }
|
19
26
|
Math.sqrt(sum_of_squares)
|
@@ -64,6 +64,70 @@ module Hierclust
|
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
67
|
+
describe Cluster, " with two points and data" do
|
68
|
+
before do
|
69
|
+
@x_1, @x_2 = 5, 15
|
70
|
+
@y_1, @y_2 = 4, 8
|
71
|
+
@p_1 = Point.new(@x_1, @y_1, :name => 'foo')
|
72
|
+
@p_2 = Point.new(@x_2, @y_2, :name => 'bar')
|
73
|
+
@c = Cluster.new([@p_1, @p_2])
|
74
|
+
@points = @c.points
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should have coordinates at the average of points' coordinates" do
|
78
|
+
@c.coordinates.should == [10, 6]
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have two points" do
|
82
|
+
@points.size.should == 2
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should include both points" do
|
86
|
+
@points.should include(@p_1, @p_2)
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should retain the data on the points" do
|
90
|
+
@points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should have correct radius" do
|
94
|
+
radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
|
95
|
+
@c.radius.should == radius
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe Cluster, " with an array of two points and data" do
|
100
|
+
before do
|
101
|
+
@x_1, @x_2 = 5, 15
|
102
|
+
@y_1, @y_2 = 4, 8
|
103
|
+
@p_1 = Point.new([@x_1, @y_1], :name => 'foo')
|
104
|
+
@p_2 = Point.new([@x_2, @y_2], :name => 'bar')
|
105
|
+
@c = Cluster.new([@p_1, @p_2])
|
106
|
+
@points = @c.points
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should have coordinates at the average of points' coordinates" do
|
110
|
+
@c.coordinates.should == [10, 6]
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should have two points" do
|
114
|
+
@points.size.should == 2
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should include both points" do
|
118
|
+
@points.should include(@p_1, @p_2)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "should retain the data on the points" do
|
122
|
+
@points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should have correct radius" do
|
126
|
+
radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
|
127
|
+
@c.radius.should == radius
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
67
131
|
describe Cluster, " with one point and one cluster" do
|
68
132
|
before do
|
69
133
|
@x_1, @x_2, @x_3 = 1, 2, 3
|
@@ -110,7 +110,7 @@ module Hierclust
|
|
110
110
|
|
111
111
|
describe "and separation 1" do
|
112
112
|
before do
|
113
|
-
@c = Clusterer.new(@points, 1)
|
113
|
+
@c = Clusterer.new(@points, :separation => 1)
|
114
114
|
end
|
115
115
|
|
116
116
|
it "should return all four individual points" do
|
@@ -120,7 +120,7 @@ module Hierclust
|
|
120
120
|
|
121
121
|
describe "and separation 2" do
|
122
122
|
before do
|
123
|
-
@c = Clusterer.new(@points, 2)
|
123
|
+
@c = Clusterer.new(@points, :separation => 2)
|
124
124
|
end
|
125
125
|
|
126
126
|
it "should return two clusters" do
|
@@ -129,6 +129,35 @@ module Hierclust
|
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
132
|
+
describe "with missing values" do
|
133
|
+
before do
|
134
|
+
@points = [
|
135
|
+
Point.new(0, 1),
|
136
|
+
Point.new(1, 0),
|
137
|
+
Point.new(nil, 4),
|
138
|
+
Point.new(4, 3),
|
139
|
+
]
|
140
|
+
end
|
141
|
+
|
142
|
+
describe "and no stand-in" do
|
143
|
+
it "should raise and error" do
|
144
|
+
lambda {
|
145
|
+
Clusterer.new(@points)
|
146
|
+
}.should raise_error
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "with a stand-in" do
|
151
|
+
before do
|
152
|
+
@c = Clusterer.new(@points, :nils => 3, :separation => 1)
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should cluster as though the missing data were the stand-in" do
|
156
|
+
@c.clusters.size.should == 4
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
132
161
|
describe "with eight points" do
|
133
162
|
before do
|
134
163
|
@points = [
|
@@ -155,7 +184,7 @@ module Hierclust
|
|
155
184
|
|
156
185
|
describe "and separation 1" do
|
157
186
|
before do
|
158
|
-
@clusters = Clusterer.new(@points, 1).clusters.sort
|
187
|
+
@clusters = Clusterer.new(@points, :separation => 1).clusters.sort
|
159
188
|
end
|
160
189
|
|
161
190
|
it "should have all eight points in individual clusters" do
|
@@ -166,7 +195,7 @@ module Hierclust
|
|
166
195
|
describe "and separation 3" do
|
167
196
|
describe "with no resolution limit" do
|
168
197
|
before do
|
169
|
-
@clusters = Clusterer.new(@points, 3).clusters.sort
|
198
|
+
@clusters = Clusterer.new(@points, :separation => 3).clusters.sort
|
170
199
|
end
|
171
200
|
|
172
201
|
it "should have three clusters" do
|
@@ -186,7 +215,7 @@ module Hierclust
|
|
186
215
|
|
187
216
|
describe "with coarse resolution" do
|
188
217
|
before do
|
189
|
-
@clusters = Clusterer.new(@points, 3, 5).clusters.sort
|
218
|
+
@clusters = Clusterer.new(@points, :separation => 3, :resolution => 5).clusters.sort
|
190
219
|
end
|
191
220
|
|
192
221
|
it "should have three clusters" do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: Empact-hierclust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 2
|
10
|
+
version: 0.2.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brandt Kurowski
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-11-
|
19
|
+
date: 2010-11-02 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|