Empact-hierclust 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Empact-hierclust.gemspec +2 -2
- data/History.txt +6 -0
- data/VERSION +1 -1
- data/lib/hierclust/cluster.rb +2 -2
- data/lib/hierclust/clusterer.rb +6 -5
- data/lib/hierclust/distances.rb +2 -2
- data/lib/hierclust/point.rb +11 -4
- data/spec/hierclust/cluster_spec.rb +64 -0
- data/spec/hierclust/clusterer_spec.rb +34 -5
- metadata +4 -4
data/Empact-hierclust.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{Empact-hierclust}
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Brandt Kurowski", "Ben Woosley"]
|
12
|
-
s.date = %q{2010-11-
|
12
|
+
s.date = %q{2010-11-02}
|
13
13
|
s.description = %q{performs hierarchical clustering on points in Euclidian space}
|
14
14
|
s.email = %q{ben.woosley@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
data/History.txt
CHANGED
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.2
|
data/lib/hierclust/cluster.rb
CHANGED
@@ -13,10 +13,10 @@ module Hierclust
|
|
13
13
|
end
|
14
14
|
|
15
15
|
# Returns the average coordinates of all items in this Cluster.
|
16
|
-
def coordinates
|
16
|
+
def coordinates(nils = nil)
|
17
17
|
return nil if size == 0
|
18
18
|
@coordinates ||= begin
|
19
|
-
coords = self.points.map {|p| p.coordinates }
|
19
|
+
coords = self.points.map {|p| p.coordinates(nils) }
|
20
20
|
coords = coords.shift.zip(*coords)
|
21
21
|
coords.map {|points| points.inject(0.0) {|sum, p| sum + p } / points.size }
|
22
22
|
end
|
data/lib/hierclust/clusterer.rb
CHANGED
@@ -16,18 +16,19 @@ module Hierclust
|
|
16
16
|
# clustered, but will be put into clusters based strictly on coordinates.
|
17
17
|
# The clusters generated by this "pre-clustering" will then be
|
18
18
|
# hierarchically clustered as normal.
|
19
|
-
def initialize(data,
|
20
|
-
@separation = separation
|
21
|
-
@resolution = resolution
|
19
|
+
def initialize(data, options = {})
|
20
|
+
@separation = options.delete(:separation)
|
21
|
+
@resolution = options.delete(:resolution)
|
22
|
+
@nils = options.delete(:nils)
|
22
23
|
@data = precluster(data)
|
23
|
-
@distances = Distances.new(@data)
|
24
|
+
@distances = Distances.new(@data, @nils)
|
24
25
|
end
|
25
26
|
|
26
27
|
# Calculates and returns the set of clusters.
|
27
28
|
def clusters
|
28
29
|
return @data if @separation && @distances.separation > @separation
|
29
30
|
while @data.length > 1
|
30
|
-
@distances = Distances.new(@data)
|
31
|
+
@distances = Distances.new(@data, @nils)
|
31
32
|
return @data if @separation && @distances.separation > @separation
|
32
33
|
@data = find_cluster
|
33
34
|
end
|
data/lib/hierclust/distances.rb
CHANGED
@@ -4,7 +4,7 @@ module Hierclust
|
|
4
4
|
attr_reader :nearest, :outliers, :separation
|
5
5
|
|
6
6
|
# Create a new Distances for the given +items+
|
7
|
-
def initialize(items)
|
7
|
+
def initialize(items, nils = nil)
|
8
8
|
@items = items
|
9
9
|
@separation = 0
|
10
10
|
@nearest = []
|
@@ -12,7 +12,7 @@ module Hierclust
|
|
12
12
|
while !items.empty?
|
13
13
|
origin = items.shift
|
14
14
|
items.each do |other|
|
15
|
-
distance = origin.distance_to(other)
|
15
|
+
distance = origin.distance_to(other, nils)
|
16
16
|
if @separation == 0 or distance < @separation
|
17
17
|
@separation = distance
|
18
18
|
@nearest = [origin, other]
|
data/lib/hierclust/point.rb
CHANGED
@@ -2,18 +2,25 @@ module Hierclust
|
|
2
2
|
# A Point represents a single point in n-dimensional space.
|
3
3
|
class Point
|
4
4
|
# x-coordinate
|
5
|
-
attr_accessor :coordinates
|
6
5
|
attr_accessor :data
|
7
6
|
|
8
7
|
# Create a new Point with the given coordinates.
|
9
8
|
def initialize(*coordinates)
|
10
9
|
@data = coordinates.last.is_a?(Hash) ? coordinates.pop : {}
|
11
|
-
@coordinates = coordinates
|
10
|
+
@coordinates = coordinates.flatten
|
12
11
|
end
|
13
12
|
|
13
|
+
def coordinates(nils = nil)
|
14
|
+
if nils
|
15
|
+
@coordinates.map {|c| c || nils }
|
16
|
+
else
|
17
|
+
@coordinates
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
14
21
|
# Returns this distance from this Point to an +other+ Point.
|
15
|
-
def distance_to(other)
|
16
|
-
sum_of_squares = coordinates.zip(other.coordinates).map do |point, other_point|
|
22
|
+
def distance_to(other, nils = nil)
|
23
|
+
sum_of_squares = coordinates(nils).zip(other.coordinates(nils)).map do |point, other_point|
|
17
24
|
(other_point - point) ** 2
|
18
25
|
end.inject(0) {|sum, distance| sum + distance }
|
19
26
|
Math.sqrt(sum_of_squares)
|
@@ -64,6 +64,70 @@ module Hierclust
|
|
64
64
|
end
|
65
65
|
end
|
66
66
|
|
67
|
+
describe Cluster, " with two points and data" do
|
68
|
+
before do
|
69
|
+
@x_1, @x_2 = 5, 15
|
70
|
+
@y_1, @y_2 = 4, 8
|
71
|
+
@p_1 = Point.new(@x_1, @y_1, :name => 'foo')
|
72
|
+
@p_2 = Point.new(@x_2, @y_2, :name => 'bar')
|
73
|
+
@c = Cluster.new([@p_1, @p_2])
|
74
|
+
@points = @c.points
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should have coordinates at the average of points' coordinates" do
|
78
|
+
@c.coordinates.should == [10, 6]
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have two points" do
|
82
|
+
@points.size.should == 2
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should include both points" do
|
86
|
+
@points.should include(@p_1, @p_2)
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should retain the data on the points" do
|
90
|
+
@points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
|
91
|
+
end
|
92
|
+
|
93
|
+
it "should have correct radius" do
|
94
|
+
radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
|
95
|
+
@c.radius.should == radius
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe Cluster, " with an array of two points and data" do
|
100
|
+
before do
|
101
|
+
@x_1, @x_2 = 5, 15
|
102
|
+
@y_1, @y_2 = 4, 8
|
103
|
+
@p_1 = Point.new([@x_1, @y_1], :name => 'foo')
|
104
|
+
@p_2 = Point.new([@x_2, @y_2], :name => 'bar')
|
105
|
+
@c = Cluster.new([@p_1, @p_2])
|
106
|
+
@points = @c.points
|
107
|
+
end
|
108
|
+
|
109
|
+
it "should have coordinates at the average of points' coordinates" do
|
110
|
+
@c.coordinates.should == [10, 6]
|
111
|
+
end
|
112
|
+
|
113
|
+
it "should have two points" do
|
114
|
+
@points.size.should == 2
|
115
|
+
end
|
116
|
+
|
117
|
+
it "should include both points" do
|
118
|
+
@points.should include(@p_1, @p_2)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "should retain the data on the points" do
|
122
|
+
@points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
|
123
|
+
end
|
124
|
+
|
125
|
+
it "should have correct radius" do
|
126
|
+
radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
|
127
|
+
@c.radius.should == radius
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
67
131
|
describe Cluster, " with one point and one cluster" do
|
68
132
|
before do
|
69
133
|
@x_1, @x_2, @x_3 = 1, 2, 3
|
@@ -110,7 +110,7 @@ module Hierclust
|
|
110
110
|
|
111
111
|
describe "and separation 1" do
|
112
112
|
before do
|
113
|
-
@c = Clusterer.new(@points, 1)
|
113
|
+
@c = Clusterer.new(@points, :separation => 1)
|
114
114
|
end
|
115
115
|
|
116
116
|
it "should return all four individual points" do
|
@@ -120,7 +120,7 @@ module Hierclust
|
|
120
120
|
|
121
121
|
describe "and separation 2" do
|
122
122
|
before do
|
123
|
-
@c = Clusterer.new(@points, 2)
|
123
|
+
@c = Clusterer.new(@points, :separation => 2)
|
124
124
|
end
|
125
125
|
|
126
126
|
it "should return two clusters" do
|
@@ -129,6 +129,35 @@ module Hierclust
|
|
129
129
|
end
|
130
130
|
end
|
131
131
|
|
132
|
+
describe "with missing values" do
|
133
|
+
before do
|
134
|
+
@points = [
|
135
|
+
Point.new(0, 1),
|
136
|
+
Point.new(1, 0),
|
137
|
+
Point.new(nil, 4),
|
138
|
+
Point.new(4, 3),
|
139
|
+
]
|
140
|
+
end
|
141
|
+
|
142
|
+
describe "and no stand-in" do
|
143
|
+
it "should raise and error" do
|
144
|
+
lambda {
|
145
|
+
Clusterer.new(@points)
|
146
|
+
}.should raise_error
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "with a stand-in" do
|
151
|
+
before do
|
152
|
+
@c = Clusterer.new(@points, :nils => 3, :separation => 1)
|
153
|
+
end
|
154
|
+
|
155
|
+
it "should cluster as though the missing data were the stand-in" do
|
156
|
+
@c.clusters.size.should == 4
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
132
161
|
describe "with eight points" do
|
133
162
|
before do
|
134
163
|
@points = [
|
@@ -155,7 +184,7 @@ module Hierclust
|
|
155
184
|
|
156
185
|
describe "and separation 1" do
|
157
186
|
before do
|
158
|
-
@clusters = Clusterer.new(@points, 1).clusters.sort
|
187
|
+
@clusters = Clusterer.new(@points, :separation => 1).clusters.sort
|
159
188
|
end
|
160
189
|
|
161
190
|
it "should have all eight points in individual clusters" do
|
@@ -166,7 +195,7 @@ module Hierclust
|
|
166
195
|
describe "and separation 3" do
|
167
196
|
describe "with no resolution limit" do
|
168
197
|
before do
|
169
|
-
@clusters = Clusterer.new(@points, 3).clusters.sort
|
198
|
+
@clusters = Clusterer.new(@points, :separation => 3).clusters.sort
|
170
199
|
end
|
171
200
|
|
172
201
|
it "should have three clusters" do
|
@@ -186,7 +215,7 @@ module Hierclust
|
|
186
215
|
|
187
216
|
describe "with coarse resolution" do
|
188
217
|
before do
|
189
|
-
@clusters = Clusterer.new(@points, 3, 5).clusters.sort
|
218
|
+
@clusters = Clusterer.new(@points, :separation => 3, :resolution => 5).clusters.sort
|
190
219
|
end
|
191
220
|
|
192
221
|
it "should have three clusters" do
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: Empact-hierclust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 0.2.
|
9
|
+
- 2
|
10
|
+
version: 0.2.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Brandt Kurowski
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-11-
|
19
|
+
date: 2010-11-02 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|