hierclust 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.1.4 2008-02-13
2
+
3
+ * 1 minor enhancement:
4
+ * gave linear-time preclustering an independent "resolution" parameter
5
+ * 1 bugfix:
6
+ * corrected cluster coordinate calculation
7
+
1
8
  == 0.1.3 2008-02-10
2
9
 
3
10
  * 1 performance improvement
@@ -15,13 +15,15 @@ module Hierclust
15
15
  # Returns the average x-coordinates of all items in this Cluster.
16
16
  def x
17
17
  return nil if size == 0
18
- @x ||= @items.inject(0) {|sum, p| sum + p.x} / size
18
+ points = self.points
19
+ @x ||= points.inject(0.0) {|sum, p| sum + p.x} / points.size
19
20
  end
20
21
 
21
22
  # Returns the average y-coordinates of all items in this Cluster.
22
23
  def y
23
24
  return nil if size == 0
24
- @y ||= @items.inject(0) {|sum, p| sum + p.y} / size
25
+ points = self.points
26
+ @y ||= points.inject(0.0) {|sum, p| sum + p.y} / points.size
25
27
  end
26
28
 
27
29
  # Add an +item+ to this Cluster.
@@ -10,8 +10,15 @@ module Hierclust
10
10
  #
11
11
  # Specify +separation+ to stop the clustering process once all the
12
12
  # items are at least +separation+ units apart.
13
- def initialize(data, separation = nil)
13
+ #
14
+ # Specify +resolution+ to give a minimum size for clusters. Points that
15
+ # are within this distance from each other will not be hierarchically
16
+ # clustered, but will be put into clusters based strictly on coordinates.
17
+ # The clusters generated by this "pre-clustering" will then be
18
+ # hierarchically clustered as normal.
19
+ def initialize(data, separation = nil, resolution = nil)
14
20
  @separation = separation
21
+ @resolution = resolution
15
22
  @data = precluster(data)
16
23
  @distances = Distances.new(@data)
17
24
  end
@@ -45,6 +52,10 @@ module Hierclust
45
52
  end
46
53
 
47
54
  def precluster(points)
55
+ if @resolution.nil?
56
+ # preclustering is only applicable given lower bound on resolution
57
+ return points.dup
58
+ end
48
59
  if @separation.nil?
49
60
  # can't precluster w/ no min separation given
50
61
  return points.dup
@@ -53,7 +64,7 @@ module Hierclust
53
64
  # if no separation is asked for, it's all one cluster
54
65
  return [Cluster.new(points)]
55
66
  end
56
- grid_size = @separation / 2.0
67
+ grid_size = @resolution
57
68
  grid_clusters = Hash.new
58
69
  points.each do |point|
59
70
  grid_x = (point.x / grid_size).floor
@@ -2,7 +2,7 @@ module Hierclust #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 3
5
+ TINY = 4
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -58,4 +58,32 @@ module Hierclust
58
58
  @points.should include(@p_1, @p_2)
59
59
  end
60
60
  end
61
+
62
+ describe Cluster, " with one point and one cluster" do
63
+ before do
64
+ @x_1, @x_2, @x_3 = 1, 2, 3
65
+ @y_1, @y_2, @y_3 = 2, 2, 5
66
+ @p_1 = Point.new(@x_1, @y_1)
67
+ @p_2 = Point.new(@x_2, @y_2)
68
+ @p_3 = Point.new(@x_3, @y_3)
69
+ @c_1 = Cluster.new([@p_1, @p_2])
70
+ @c_2 = Cluster.new([@p_3, @c_1])
71
+ end
72
+
73
+ it "should have two items" do
74
+ @c_2.items.size.should == 2
75
+ end
76
+
77
+ it "should have three points" do
78
+ @c_2.points.size.should == 3
79
+ end
80
+
81
+ it "should have x-coordinate at average of point's x-coordinates" do
82
+ @c_2.x.should == 2
83
+ end
84
+
85
+ it "should have y-coordinate at average of point's y-coordinates" do
86
+ @c_2.y.should == 3
87
+ end
88
+ end
61
89
  end
@@ -164,18 +164,43 @@ module Hierclust
164
164
  end
165
165
 
166
166
  describe "and separation 3" do
167
- before do
168
- @clusters = Clusterer.new(@points, 3).clusters.sort
167
+ describe "with no resolution limit" do
168
+ before do
169
+ @clusters = Clusterer.new(@points, 3).clusters.sort
170
+ end
171
+
172
+ it "should have three clusters" do
173
+ @clusters.size.should == 3
174
+ end
175
+
176
+ it "should have clusters size 2, 2, and 4 " do
177
+ @clusters[0].points.size.should == 2
178
+ @clusters[1].points.size.should == 2
179
+ @clusters[2].points.size.should == 4
180
+ end
181
+
182
+ it "should have 2 items in large cluster" do
183
+ @clusters[2].items.size.should == 2
184
+ end
169
185
  end
170
186
 
171
- it "should have three clusters" do
172
- @clusters.size.should == 3
173
- end
187
+ describe "with coarse resolution" do
188
+ before do
189
+ @clusters = Clusterer.new(@points, 3, 5).clusters.sort
190
+ end
191
+
192
+ it "should have three clusters" do
193
+ @clusters.size.should == 2
194
+ end
195
+
196
+ it "should have clusters size 2, 2, and 4 " do
197
+ @clusters[0].points.size.should == 4
198
+ @clusters[1].points.size.should == 4
199
+ end
174
200
 
175
- it "should have clusters size 2, 2, and 4 " do
176
- @clusters[0].points.size.should == 2
177
- @clusters[1].points.size.should == 2
178
- @clusters[2].points.size.should == 4
201
+ it "should have 4 items in large cluster" do
202
+ @clusters[1].items.size.should == 4
203
+ end
179
204
  end
180
205
  end
181
206
  end
@@ -4,10 +4,15 @@ module Hierclust
4
4
  describe Point do
5
5
  before do
6
6
  @p = Point.new(1, 1)
7
+ @p2 = Point.new(2, 1)
7
8
  end
8
9
 
9
10
  it "should have points that is an array of only self" do
10
11
  @p.points.should == [@p]
11
12
  end
13
+
14
+ it "should calculate distance to other" do
15
+ @p.distance_to(@p2).should == 1.0
16
+ end
12
17
  end
13
18
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Simple Hierarchical Clustering</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.3</a>
36
+ <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.4</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;hierclust&#8217;</h1>
39
39
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hierclust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandt Kurowski
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-10 00:00:00 -05:00
12
+ date: 2008-02-13 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15