Empact-hierclust 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{Empact-hierclust}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brandt Kurowski", "Ben Woosley"]
12
- s.date = %q{2010-11-01}
12
+ s.date = %q{2010-11-02}
13
13
  s.description = %q{performs hierarchical clustering on points in Euclidian space}
14
14
  s.email = %q{ben.woosley@gmail.com}
15
15
  s.extra_rdoc_files = [
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 0.2.2 2010-11-01
2
+
3
+ * 1 minor enhancement
4
+ * Flatten incoming Point coordinates so that we can pass them as an array as well
5
+ * Make Clusterer resolution and separation arguments hash-based
6
+
1
7
  == 0.2.1 2010-11-01
2
8
 
3
9
  * 1 minor enhancement
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -13,10 +13,10 @@ module Hierclust
13
13
  end
14
14
 
15
15
  # Returns the average coordinates of all items in this Cluster.
16
- def coordinates
16
+ def coordinates(nils = nil)
17
17
  return nil if size == 0
18
18
  @coordinates ||= begin
19
- coords = self.points.map {|p| p.coordinates }
19
+ coords = self.points.map {|p| p.coordinates(nils) }
20
20
  coords = coords.shift.zip(*coords)
21
21
  coords.map {|points| points.inject(0.0) {|sum, p| sum + p } / points.size }
22
22
  end
@@ -16,18 +16,19 @@ module Hierclust
16
16
  # clustered, but will be put into clusters based strictly on coordinates.
17
17
  # The clusters generated by this "pre-clustering" will then be
18
18
  # hierarchically clustered as normal.
19
- def initialize(data, separation = nil, resolution = nil)
20
- @separation = separation
21
- @resolution = resolution
19
+ def initialize(data, options = {})
20
+ @separation = options.delete(:separation)
21
+ @resolution = options.delete(:resolution)
22
+ @nils = options.delete(:nils)
22
23
  @data = precluster(data)
23
- @distances = Distances.new(@data)
24
+ @distances = Distances.new(@data, @nils)
24
25
  end
25
26
 
26
27
  # Calculates and returns the set of clusters.
27
28
  def clusters
28
29
  return @data if @separation && @distances.separation > @separation
29
30
  while @data.length > 1
30
- @distances = Distances.new(@data)
31
+ @distances = Distances.new(@data, @nils)
31
32
  return @data if @separation && @distances.separation > @separation
32
33
  @data = find_cluster
33
34
  end
@@ -4,7 +4,7 @@ module Hierclust
4
4
  attr_reader :nearest, :outliers, :separation
5
5
 
6
6
  # Create a new Distances for the given +items+
7
- def initialize(items)
7
+ def initialize(items, nils = nil)
8
8
  @items = items
9
9
  @separation = 0
10
10
  @nearest = []
@@ -12,7 +12,7 @@ module Hierclust
12
12
  while !items.empty?
13
13
  origin = items.shift
14
14
  items.each do |other|
15
- distance = origin.distance_to(other)
15
+ distance = origin.distance_to(other, nils)
16
16
  if @separation == 0 or distance < @separation
17
17
  @separation = distance
18
18
  @nearest = [origin, other]
@@ -2,18 +2,25 @@ module Hierclust
2
2
  # A Point represents a single point in n-dimensional space.
3
3
  class Point
4
4
  # x-coordinate
5
- attr_accessor :coordinates
6
5
  attr_accessor :data
7
6
 
8
7
  # Create a new Point with the given coordinates.
9
8
  def initialize(*coordinates)
10
9
  @data = coordinates.last.is_a?(Hash) ? coordinates.pop : {}
11
- @coordinates = coordinates
10
+ @coordinates = coordinates.flatten
12
11
  end
13
12
 
13
+ def coordinates(nils = nil)
14
+ if nils
15
+ @coordinates.map {|c| c || nils }
16
+ else
17
+ @coordinates
18
+ end
19
+ end
20
+
14
21
  # Returns this distance from this Point to an +other+ Point.
15
- def distance_to(other)
16
- sum_of_squares = coordinates.zip(other.coordinates).map do |point, other_point|
22
+ def distance_to(other, nils = nil)
23
+ sum_of_squares = coordinates(nils).zip(other.coordinates(nils)).map do |point, other_point|
17
24
  (other_point - point) ** 2
18
25
  end.inject(0) {|sum, distance| sum + distance }
19
26
  Math.sqrt(sum_of_squares)
@@ -64,6 +64,70 @@ module Hierclust
64
64
  end
65
65
  end
66
66
 
67
+ describe Cluster, " with two points and data" do
68
+ before do
69
+ @x_1, @x_2 = 5, 15
70
+ @y_1, @y_2 = 4, 8
71
+ @p_1 = Point.new(@x_1, @y_1, :name => 'foo')
72
+ @p_2 = Point.new(@x_2, @y_2, :name => 'bar')
73
+ @c = Cluster.new([@p_1, @p_2])
74
+ @points = @c.points
75
+ end
76
+
77
+ it "should have coordinates at the average of points' coordinates" do
78
+ @c.coordinates.should == [10, 6]
79
+ end
80
+
81
+ it "should have two points" do
82
+ @points.size.should == 2
83
+ end
84
+
85
+ it "should include both points" do
86
+ @points.should include(@p_1, @p_2)
87
+ end
88
+
89
+ it "should retain the data on the points" do
90
+ @points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
91
+ end
92
+
93
+ it "should have correct radius" do
94
+ radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
95
+ @c.radius.should == radius
96
+ end
97
+ end
98
+
99
+ describe Cluster, " with an array of two points and data" do
100
+ before do
101
+ @x_1, @x_2 = 5, 15
102
+ @y_1, @y_2 = 4, 8
103
+ @p_1 = Point.new([@x_1, @y_1], :name => 'foo')
104
+ @p_2 = Point.new([@x_2, @y_2], :name => 'bar')
105
+ @c = Cluster.new([@p_1, @p_2])
106
+ @points = @c.points
107
+ end
108
+
109
+ it "should have coordinates at the average of points' coordinates" do
110
+ @c.coordinates.should == [10, 6]
111
+ end
112
+
113
+ it "should have two points" do
114
+ @points.size.should == 2
115
+ end
116
+
117
+ it "should include both points" do
118
+ @points.should include(@p_1, @p_2)
119
+ end
120
+
121
+ it "should retain the data on the points" do
122
+ @points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
123
+ end
124
+
125
+ it "should have correct radius" do
126
+ radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
127
+ @c.radius.should == radius
128
+ end
129
+ end
130
+
67
131
  describe Cluster, " with one point and one cluster" do
68
132
  before do
69
133
  @x_1, @x_2, @x_3 = 1, 2, 3
@@ -110,7 +110,7 @@ module Hierclust
110
110
 
111
111
  describe "and separation 1" do
112
112
  before do
113
- @c = Clusterer.new(@points, 1)
113
+ @c = Clusterer.new(@points, :separation => 1)
114
114
  end
115
115
 
116
116
  it "should return all four individual points" do
@@ -120,7 +120,7 @@ module Hierclust
120
120
 
121
121
  describe "and separation 2" do
122
122
  before do
123
- @c = Clusterer.new(@points, 2)
123
+ @c = Clusterer.new(@points, :separation => 2)
124
124
  end
125
125
 
126
126
  it "should return two clusters" do
@@ -129,6 +129,35 @@ module Hierclust
129
129
  end
130
130
  end
131
131
 
132
+ describe "with missing values" do
133
+ before do
134
+ @points = [
135
+ Point.new(0, 1),
136
+ Point.new(1, 0),
137
+ Point.new(nil, 4),
138
+ Point.new(4, 3),
139
+ ]
140
+ end
141
+
142
+ describe "and no stand-in" do
143
+ it "should raise and error" do
144
+ lambda {
145
+ Clusterer.new(@points)
146
+ }.should raise_error
147
+ end
148
+ end
149
+
150
+ describe "with a stand-in" do
151
+ before do
152
+ @c = Clusterer.new(@points, :nils => 3, :separation => 1)
153
+ end
154
+
155
+ it "should cluster as though the missing data were the stand-in" do
156
+ @c.clusters.size.should == 4
157
+ end
158
+ end
159
+ end
160
+
132
161
  describe "with eight points" do
133
162
  before do
134
163
  @points = [
@@ -155,7 +184,7 @@ module Hierclust
155
184
 
156
185
  describe "and separation 1" do
157
186
  before do
158
- @clusters = Clusterer.new(@points, 1).clusters.sort
187
+ @clusters = Clusterer.new(@points, :separation => 1).clusters.sort
159
188
  end
160
189
 
161
190
  it "should have all eight points in individual clusters" do
@@ -166,7 +195,7 @@ module Hierclust
166
195
  describe "and separation 3" do
167
196
  describe "with no resolution limit" do
168
197
  before do
169
- @clusters = Clusterer.new(@points, 3).clusters.sort
198
+ @clusters = Clusterer.new(@points, :separation => 3).clusters.sort
170
199
  end
171
200
 
172
201
  it "should have three clusters" do
@@ -186,7 +215,7 @@ module Hierclust
186
215
 
187
216
  describe "with coarse resolution" do
188
217
  before do
189
- @clusters = Clusterer.new(@points, 3, 5).clusters.sort
218
+ @clusters = Clusterer.new(@points, :separation => 3, :resolution => 5).clusters.sort
190
219
  end
191
220
 
192
221
  it "should have three clusters" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: Empact-hierclust
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brandt Kurowski
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-11-01 00:00:00 -07:00
19
+ date: 2010-11-02 00:00:00 -07:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency