hierclust 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.1.4 2008-02-13
2
+
3
+ * 1 minor enhancement:
4
+ * gave linear-time preclustering an independent "resolution" parameter
5
+ * 1 bugfix:
6
+ * corrected cluster coordinate calculation
7
+
1
8
  == 0.1.3 2008-02-10
2
9
 
3
10
  * 1 performance improvement
@@ -15,13 +15,15 @@ module Hierclust
15
15
  # Returns the average x-coordinates of all items in this Cluster.
16
16
  def x
17
17
  return nil if size == 0
18
- @x ||= @items.inject(0) {|sum, p| sum + p.x} / size
18
+ points = self.points
19
+ @x ||= points.inject(0.0) {|sum, p| sum + p.x} / points.size
19
20
  end
20
21
 
21
22
  # Returns the average y-coordinates of all items in this Cluster.
22
23
  def y
23
24
  return nil if size == 0
24
- @y ||= @items.inject(0) {|sum, p| sum + p.y} / size
25
+ points = self.points
26
+ @y ||= points.inject(0.0) {|sum, p| sum + p.y} / points.size
25
27
  end
26
28
 
27
29
  # Add an +item+ to this Cluster.
@@ -10,8 +10,15 @@ module Hierclust
10
10
  #
11
11
  # Specify +separation+ to stop the clustering process once all the
12
12
  # items are at least +separation+ units apart.
13
- def initialize(data, separation = nil)
13
+ #
14
+ # Specify +resolution+ to give a minimum size for clusters. Points that
15
+ # are within this distance from each other will not be hierarchically
16
+ # clustered, but will be put into clusters based strictly on coordinates.
17
+ # The clusters generated by this "pre-clustering" will then be
18
+ # hierarchically clustered as normal.
19
+ def initialize(data, separation = nil, resolution = nil)
14
20
  @separation = separation
21
+ @resolution = resolution
15
22
  @data = precluster(data)
16
23
  @distances = Distances.new(@data)
17
24
  end
@@ -45,6 +52,10 @@ module Hierclust
45
52
  end
46
53
 
47
54
  def precluster(points)
55
+ if @resolution.nil?
56
+ # preclustering is only applicable given lower bound on resolution
57
+ return points.dup
58
+ end
48
59
  if @separation.nil?
49
60
  # can't precluster w/ no min separation given
50
61
  return points.dup
@@ -53,7 +64,7 @@ module Hierclust
53
64
  # if no separation is asked for, it's all one cluster
54
65
  return [Cluster.new(points)]
55
66
  end
56
- grid_size = @separation / 2.0
67
+ grid_size = @resolution
57
68
  grid_clusters = Hash.new
58
69
  points.each do |point|
59
70
  grid_x = (point.x / grid_size).floor
@@ -2,7 +2,7 @@ module Hierclust #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 3
5
+ TINY = 4
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -58,4 +58,32 @@ module Hierclust
58
58
  @points.should include(@p_1, @p_2)
59
59
  end
60
60
  end
61
+
62
+ describe Cluster, " with one point and one cluster" do
63
+ before do
64
+ @x_1, @x_2, @x_3 = 1, 2, 3
65
+ @y_1, @y_2, @y_3 = 2, 2, 5
66
+ @p_1 = Point.new(@x_1, @y_1)
67
+ @p_2 = Point.new(@x_2, @y_2)
68
+ @p_3 = Point.new(@x_3, @y_3)
69
+ @c_1 = Cluster.new([@p_1, @p_2])
70
+ @c_2 = Cluster.new([@p_3, @c_1])
71
+ end
72
+
73
+ it "should have two items" do
74
+ @c_2.items.size.should == 2
75
+ end
76
+
77
+ it "should have three points" do
78
+ @c_2.points.size.should == 3
79
+ end
80
+
81
+ it "should have x-coordinate at average of point's x-coordinates" do
82
+ @c_2.x.should == 2
83
+ end
84
+
85
+ it "should have y-coordinate at average of point's y-coordinates" do
86
+ @c_2.y.should == 3
87
+ end
88
+ end
61
89
  end
@@ -164,18 +164,43 @@ module Hierclust
164
164
  end
165
165
 
166
166
  describe "and separation 3" do
167
- before do
168
- @clusters = Clusterer.new(@points, 3).clusters.sort
167
+ describe "with no resolution limit" do
168
+ before do
169
+ @clusters = Clusterer.new(@points, 3).clusters.sort
170
+ end
171
+
172
+ it "should have three clusters" do
173
+ @clusters.size.should == 3
174
+ end
175
+
176
+ it "should have clusters size 2, 2, and 4 " do
177
+ @clusters[0].points.size.should == 2
178
+ @clusters[1].points.size.should == 2
179
+ @clusters[2].points.size.should == 4
180
+ end
181
+
182
+ it "should have 2 items in large cluster" do
183
+ @clusters[2].items.size.should == 2
184
+ end
169
185
  end
170
186
 
171
- it "should have three clusters" do
172
- @clusters.size.should == 3
173
- end
187
+ describe "with coarse resolution" do
188
+ before do
189
+ @clusters = Clusterer.new(@points, 3, 5).clusters.sort
190
+ end
191
+
192
+ it "should have three clusters" do
193
+ @clusters.size.should == 2
194
+ end
195
+
196
+ it "should have clusters size 2, 2, and 4 " do
197
+ @clusters[0].points.size.should == 4
198
+ @clusters[1].points.size.should == 4
199
+ end
174
200
 
175
- it "should have clusters size 2, 2, and 4 " do
176
- @clusters[0].points.size.should == 2
177
- @clusters[1].points.size.should == 2
178
- @clusters[2].points.size.should == 4
201
+ it "should have 4 items in large cluster" do
202
+ @clusters[1].items.size.should == 4
203
+ end
179
204
  end
180
205
  end
181
206
  end
@@ -4,10 +4,15 @@ module Hierclust
4
4
  describe Point do
5
5
  before do
6
6
  @p = Point.new(1, 1)
7
+ @p2 = Point.new(2, 1)
7
8
  end
8
9
 
9
10
  it "should have points that is an array of only self" do
10
11
  @p.points.should == [@p]
11
12
  end
13
+
14
+ it "should calculate distance to other" do
15
+ @p.distance_to(@p2).should == 1.0
16
+ end
12
17
  end
13
18
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Simple Hierarchical Clustering</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.3</a>
36
+ <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.4</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;hierclust&#8217;</h1>
39
39
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hierclust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandt Kurowski
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-10 00:00:00 -05:00
12
+ date: 2008-02-13 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15