hierclust 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.1.2 2008-02-07
2
+
3
+ * 1 performance improvement
4
+ * refactored Distances to be more intelligent about precalculated values
5
+ * 1 bugfix:
6
+ * correct degenerate case of Hierclust::Point.points
7
+
1
8
  == 0.1.1 2008-02-04
2
9
 
3
10
  * 1 minor enhancement:
@@ -1,56 +1,25 @@
1
1
  module Hierclust
2
2
  # Represents the pair-wise distances between a set of items.
3
3
  class Distances
4
+ attr_reader :nearest, :outliers, :separation
5
+
4
6
  # Create a new Distances for the given +items+
5
7
  def initialize(items)
6
8
  @items = items
7
- @distances = {}
8
- end
9
-
10
- # Returns the distance between items +a+ and +b+.
11
- def [](a, b)
12
- key = (a.object_id < b.object_id) ?
13
- "#{a.object_id},#{b.object_id}" :
14
- "#{b.object_id},#{a.object_id}"
15
- @distances[key] ||= Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2)
16
- end
17
-
18
- # Returns the pair of items that are nearest to each other.
19
- def nearest
20
- @nearest ||= begin
21
- key = nearest_pair[0]
22
- a_id, b_id = key.split ','
23
- a = @items.select {|item| item.object_id.to_s == a_id}.first
24
- b = @items.select {|item| item.object_id.to_s == b_id}.first
25
- [a, b]
26
- end
27
- end
28
-
29
- # Returns all items except the pair that are nearest to each other.
30
- def outliers
31
- @outliers ||= begin
32
- @items - nearest
33
- end
34
- end
35
-
36
- # Returns the smallest distance between any pair of items.
37
- def separation
38
- @separation ||= @items.size < 2 ? 0 : nearest_pair[1]
39
- end
40
-
41
- private
42
-
43
- def nearest_pair
44
- prepopulate_cache
45
- @distances.to_a.sort{|a, b| a[1] <=> b[1]}.first
46
- end
47
-
48
- def prepopulate_cache
9
+ @separation = 0
10
+ @nearest = []
49
11
  items = @items.dup
50
12
  while !items.empty?
51
13
  origin = items.shift
52
- items.each {|item| self[origin, item]}
14
+ items.each do |other|
15
+ distance = origin.distance_to(other)
16
+ if @separation == 0 or distance < @separation
17
+ @separation = distance
18
+ @nearest = [origin, other]
19
+ end
20
+ end
53
21
  end
22
+ @outliers = @items - @nearest
54
23
  end
55
24
  end
56
25
  end
@@ -12,7 +12,12 @@ module Hierclust
12
12
  @x = x
13
13
  @y = y
14
14
  end
15
-
15
+
16
+ # Returns this distance from this Point to an +other+ Point.
17
+ def distance_to(other)
18
+ Math.sqrt((other.x - self.x) ** 2 + (other.y - self.y) ** 2)
19
+ end
20
+
16
21
  # Simplifies code by letting us treat Clusters and Points interchangeably
17
22
  def size #:nodoc:
18
23
  1
@@ -20,7 +25,7 @@ module Hierclust
20
25
 
21
26
  # Simplifies code by letting us treat Clusters and Points interchangeably
22
27
  def points #:nodoc:
23
- self
28
+ [self]
24
29
  end
25
30
 
26
31
  # Returns a legible representation of this Point.
@@ -29,8 +34,17 @@ module Hierclust
29
34
  end
30
35
 
31
36
  # Sorts points relative to each other on the x-axis.
37
+ #
38
+ # Uses y-axis as a tie-breaker, so that sorting is stable even if
39
+ # multiple points have the same x-coordinate.
40
+ #
41
+ # Uses object_id as a final tie-breaker, so sorts are guaranteed to
42
+ # be stable even when multiple points have the same coordinates.
32
43
  def <=>(other)
33
- return self.x <=> other.x
44
+ cmp = self.x <=> other.x
45
+ cmp = self.y <=> other.y if cmp == 0
46
+ cmp = self.object_id <=> other.object_id if cmp == 0
47
+ return cmp
34
48
  end
35
49
  end
36
50
  end
@@ -2,7 +2,7 @@ module Hierclust #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -11,14 +11,6 @@ module Hierclust
11
11
  @dist = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2)
12
12
  end
13
13
 
14
- it "should tell us the distance between those points" do
15
- @d[@p_1, @p_2].should == @dist
16
- end
17
-
18
- it "should tell us the distance between those points backwards" do
19
- @d[@p_1, @p_2].should == @dist
20
- end
21
-
22
14
  it "should have separation equal to distance between those points" do
23
15
  @d.separation.should == @dist
24
16
  end
@@ -6,7 +6,8 @@ module Hierclust
6
6
  @p = Point.new(1, 1)
7
7
  end
8
8
 
9
- it "should return no clusters" do
9
+ it "should have points that is an array of only self" do
10
+ @p.points.should == [@p]
10
11
  end
11
12
  end
12
13
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Simple Hierarchical Clustering</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.1</a>
36
+ <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.2</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;hierclust&#8217;</h1>
39
39
 
@@ -105,7 +105,7 @@ to your data:</p>
105
105
  <span class="ident">map</span><span class="punct">.</span><span class="ident">add_point</span><span class="punct">(</span>
106
106
  <span class="ident">x</span> <span class="punct">=&gt;</span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">x</span><span class="punct">,</span>
107
107
  <span class="ident">y</span> <span class="punct">=&gt;</span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">y</span><span class="punct">,</span>
108
- <span class="ident">label</span> <span class="punct">=&gt;</span> <span class="punct">&quot;</span><span class="string"><span class="expr">#{cluster.size}</span> Things</span><span class="punct">&quot;</span>
108
+ <span class="ident">label</span> <span class="punct">=&gt;</span> <span class="punct">&quot;</span><span class="string"><span class="expr">#{cluster.points}</span> Things</span><span class="punct">&quot;</span>
109
109
  <span class="punct">)</span>
110
110
  <span class="keyword">end</span></pre></p>
111
111
 
@@ -125,10 +125,7 @@ to your data:</p>
125
125
  <h2>Source code</h2>
126
126
 
127
127
 
128
- <p>The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.</p>
129
-
130
-
131
- <p>You can also browse the source online at <a href="http://hierclust.rubyforge.org/svn/trunk/">http://hierclust.rubyforge.org/svn/trunk/</a></p>
128
+ <p>You can browse the source at <a href="http://hierclust.rubyforge.org/svn/trunk/">http://hierclust.rubyforge.org/svn/trunk/</a></p>
132
129
 
133
130
 
134
131
  <h2>How to submit patches</h2>
@@ -148,7 +145,7 @@ to your data:</p>
148
145
 
149
146
  <p>Comments are welcome. Send an email to <a href="mailto:brandt@kurowski.net">Brandt Kurowski</a> email via the <a href="http://groups.google.com/group/hierclust">forum</a></p>
150
147
  <p class="coda">
151
- <a href="http://brandt.kurowski.net/">Brandt Kurowski</a>, 4th February 2008<br>
148
+ <a href="http://brandt.kurowski.net/">Brandt Kurowski</a>, 6th February 2008<br>
152
149
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
153
150
  </p>
154
151
  </div>
data/website/index.txt CHANGED
@@ -52,7 +52,7 @@ clusters.each do |cluster|
52
52
  map.add_point(
53
53
  x => cluster.x,
54
54
  y => cluster.y,
55
- label => "#{cluster.size} Things"
55
+ label => "#{cluster.points} Things"
56
56
  )
57
57
  end</pre>
58
58
 
@@ -66,9 +66,7 @@ h2. Forum
66
66
 
67
67
  h2. Source code
68
68
 
69
- The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.
70
-
71
- You can also browse the source online at "http://hierclust.rubyforge.org/svn/trunk/":http://hierclust.rubyforge.org/svn/trunk/
69
+ You can browse the source at "http://hierclust.rubyforge.org/svn/trunk/":http://hierclust.rubyforge.org/svn/trunk/
72
70
 
73
71
  h2. How to submit patches
74
72
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hierclust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandt Kurowski
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-04 00:00:00 -05:00
12
+ date: 2008-02-08 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15