Empact-hierclust 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{Empact-hierclust}
8
- s.version = "0.2.1"
8
+ s.version = "0.2.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Brandt Kurowski", "Ben Woosley"]
12
- s.date = %q{2010-11-01}
12
+ s.date = %q{2010-11-02}
13
13
  s.description = %q{performs hierarchical clustering on points in Euclidian space}
14
14
  s.email = %q{ben.woosley@gmail.com}
15
15
  s.extra_rdoc_files = [
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ == 0.2.2 2010-11-01
2
+
3
+ * 1 minor enhancement
4
+ * Flatten incoming Point coordinates so that we can pass them as an array as well
5
+ * Make Clusterer resolution and separation arguments hash-based
6
+
1
7
  == 0.2.1 2010-11-01
2
8
 
3
9
  * 1 minor enhancement
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.1
1
+ 0.2.2
@@ -13,10 +13,10 @@ module Hierclust
13
13
  end
14
14
 
15
15
  # Returns the average coordinates of all items in this Cluster.
16
- def coordinates
16
+ def coordinates(nils = nil)
17
17
  return nil if size == 0
18
18
  @coordinates ||= begin
19
- coords = self.points.map {|p| p.coordinates }
19
+ coords = self.points.map {|p| p.coordinates(nils) }
20
20
  coords = coords.shift.zip(*coords)
21
21
  coords.map {|points| points.inject(0.0) {|sum, p| sum + p } / points.size }
22
22
  end
@@ -16,18 +16,19 @@ module Hierclust
16
16
  # clustered, but will be put into clusters based strictly on coordinates.
17
17
  # The clusters generated by this "pre-clustering" will then be
18
18
  # hierarchically clustered as normal.
19
- def initialize(data, separation = nil, resolution = nil)
20
- @separation = separation
21
- @resolution = resolution
19
+ def initialize(data, options = {})
20
+ @separation = options.delete(:separation)
21
+ @resolution = options.delete(:resolution)
22
+ @nils = options.delete(:nils)
22
23
  @data = precluster(data)
23
- @distances = Distances.new(@data)
24
+ @distances = Distances.new(@data, @nils)
24
25
  end
25
26
 
26
27
  # Calculates and returns the set of clusters.
27
28
  def clusters
28
29
  return @data if @separation && @distances.separation > @separation
29
30
  while @data.length > 1
30
- @distances = Distances.new(@data)
31
+ @distances = Distances.new(@data, @nils)
31
32
  return @data if @separation && @distances.separation > @separation
32
33
  @data = find_cluster
33
34
  end
@@ -4,7 +4,7 @@ module Hierclust
4
4
  attr_reader :nearest, :outliers, :separation
5
5
 
6
6
  # Create a new Distances for the given +items+
7
- def initialize(items)
7
+ def initialize(items, nils = nil)
8
8
  @items = items
9
9
  @separation = 0
10
10
  @nearest = []
@@ -12,7 +12,7 @@ module Hierclust
12
12
  while !items.empty?
13
13
  origin = items.shift
14
14
  items.each do |other|
15
- distance = origin.distance_to(other)
15
+ distance = origin.distance_to(other, nils)
16
16
  if @separation == 0 or distance < @separation
17
17
  @separation = distance
18
18
  @nearest = [origin, other]
@@ -2,18 +2,25 @@ module Hierclust
2
2
  # A Point represents a single point in n-dimensional space.
3
3
  class Point
4
4
  # x-coordinate
5
- attr_accessor :coordinates
6
5
  attr_accessor :data
7
6
 
8
7
  # Create a new Point with the given coordinates.
9
8
  def initialize(*coordinates)
10
9
  @data = coordinates.last.is_a?(Hash) ? coordinates.pop : {}
11
- @coordinates = coordinates
10
+ @coordinates = coordinates.flatten
12
11
  end
13
12
 
13
+ def coordinates(nils = nil)
14
+ if nils
15
+ @coordinates.map {|c| c || nils }
16
+ else
17
+ @coordinates
18
+ end
19
+ end
20
+
14
21
  # Returns this distance from this Point to an +other+ Point.
15
- def distance_to(other)
16
- sum_of_squares = coordinates.zip(other.coordinates).map do |point, other_point|
22
+ def distance_to(other, nils = nil)
23
+ sum_of_squares = coordinates(nils).zip(other.coordinates(nils)).map do |point, other_point|
17
24
  (other_point - point) ** 2
18
25
  end.inject(0) {|sum, distance| sum + distance }
19
26
  Math.sqrt(sum_of_squares)
@@ -64,6 +64,70 @@ module Hierclust
64
64
  end
65
65
  end
66
66
 
67
+ describe Cluster, " with two points and data" do
68
+ before do
69
+ @x_1, @x_2 = 5, 15
70
+ @y_1, @y_2 = 4, 8
71
+ @p_1 = Point.new(@x_1, @y_1, :name => 'foo')
72
+ @p_2 = Point.new(@x_2, @y_2, :name => 'bar')
73
+ @c = Cluster.new([@p_1, @p_2])
74
+ @points = @c.points
75
+ end
76
+
77
+ it "should have coordinates at the average of points' coordinates" do
78
+ @c.coordinates.should == [10, 6]
79
+ end
80
+
81
+ it "should have two points" do
82
+ @points.size.should == 2
83
+ end
84
+
85
+ it "should include both points" do
86
+ @points.should include(@p_1, @p_2)
87
+ end
88
+
89
+ it "should retain the data on the points" do
90
+ @points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
91
+ end
92
+
93
+ it "should have correct radius" do
94
+ radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
95
+ @c.radius.should == radius
96
+ end
97
+ end
98
+
99
+ describe Cluster, " with an array of two points and data" do
100
+ before do
101
+ @x_1, @x_2 = 5, 15
102
+ @y_1, @y_2 = 4, 8
103
+ @p_1 = Point.new([@x_1, @y_1], :name => 'foo')
104
+ @p_2 = Point.new([@x_2, @y_2], :name => 'bar')
105
+ @c = Cluster.new([@p_1, @p_2])
106
+ @points = @c.points
107
+ end
108
+
109
+ it "should have coordinates at the average of points' coordinates" do
110
+ @c.coordinates.should == [10, 6]
111
+ end
112
+
113
+ it "should have two points" do
114
+ @points.size.should == 2
115
+ end
116
+
117
+ it "should include both points" do
118
+ @points.should include(@p_1, @p_2)
119
+ end
120
+
121
+ it "should retain the data on the points" do
122
+ @points.map(&:data).should =~ [{:name => 'foo'}, {:name => 'bar'}]
123
+ end
124
+
125
+ it "should have correct radius" do
126
+ radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
127
+ @c.radius.should == radius
128
+ end
129
+ end
130
+
67
131
  describe Cluster, " with one point and one cluster" do
68
132
  before do
69
133
  @x_1, @x_2, @x_3 = 1, 2, 3
@@ -110,7 +110,7 @@ module Hierclust
110
110
 
111
111
  describe "and separation 1" do
112
112
  before do
113
- @c = Clusterer.new(@points, 1)
113
+ @c = Clusterer.new(@points, :separation => 1)
114
114
  end
115
115
 
116
116
  it "should return all four individual points" do
@@ -120,7 +120,7 @@ module Hierclust
120
120
 
121
121
  describe "and separation 2" do
122
122
  before do
123
- @c = Clusterer.new(@points, 2)
123
+ @c = Clusterer.new(@points, :separation => 2)
124
124
  end
125
125
 
126
126
  it "should return two clusters" do
@@ -129,6 +129,35 @@ module Hierclust
129
129
  end
130
130
  end
131
131
 
132
+ describe "with missing values" do
133
+ before do
134
+ @points = [
135
+ Point.new(0, 1),
136
+ Point.new(1, 0),
137
+ Point.new(nil, 4),
138
+ Point.new(4, 3),
139
+ ]
140
+ end
141
+
142
+ describe "and no stand-in" do
143
+ it "should raise and error" do
144
+ lambda {
145
+ Clusterer.new(@points)
146
+ }.should raise_error
147
+ end
148
+ end
149
+
150
+ describe "with a stand-in" do
151
+ before do
152
+ @c = Clusterer.new(@points, :nils => 3, :separation => 1)
153
+ end
154
+
155
+ it "should cluster as though the missing data were the stand-in" do
156
+ @c.clusters.size.should == 4
157
+ end
158
+ end
159
+ end
160
+
132
161
  describe "with eight points" do
133
162
  before do
134
163
  @points = [
@@ -155,7 +184,7 @@ module Hierclust
155
184
 
156
185
  describe "and separation 1" do
157
186
  before do
158
- @clusters = Clusterer.new(@points, 1).clusters.sort
187
+ @clusters = Clusterer.new(@points, :separation => 1).clusters.sort
159
188
  end
160
189
 
161
190
  it "should have all eight points in individual clusters" do
@@ -166,7 +195,7 @@ module Hierclust
166
195
  describe "and separation 3" do
167
196
  describe "with no resolution limit" do
168
197
  before do
169
- @clusters = Clusterer.new(@points, 3).clusters.sort
198
+ @clusters = Clusterer.new(@points, :separation => 3).clusters.sort
170
199
  end
171
200
 
172
201
  it "should have three clusters" do
@@ -186,7 +215,7 @@ module Hierclust
186
215
 
187
216
  describe "with coarse resolution" do
188
217
  before do
189
- @clusters = Clusterer.new(@points, 3, 5).clusters.sort
218
+ @clusters = Clusterer.new(@points, :separation => 3, :resolution => 5).clusters.sort
190
219
  end
191
220
 
192
221
  it "should have three clusters" do
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: Empact-hierclust
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 1
10
- version: 0.2.1
9
+ - 2
10
+ version: 0.2.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Brandt Kurowski
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-11-01 00:00:00 -07:00
19
+ date: 2010-11-02 00:00:00 -07:00
20
20
  default_executable:
21
21
  dependencies:
22
22
  - !ruby/object:Gem::Dependency