hierclust 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.1.2 2008-02-07
2
+
3
+ * 1 performance improvement
4
+ * refactored Distances to be more intelligent about precalculated values
5
+ * 1 bugfix:
6
+ * correct degenerate case of Hierclust::Point.points
7
+
1
8
  == 0.1.1 2008-02-04
2
9
 
3
10
  * 1 minor enhancement:
@@ -1,56 +1,25 @@
1
1
  module Hierclust
2
2
  # Represents the pair-wise distances between a set of items.
3
3
  class Distances
4
+ attr_reader :nearest, :outliers, :separation
5
+
4
6
  # Create a new Distances for the given +items+
5
7
  def initialize(items)
6
8
  @items = items
7
- @distances = {}
8
- end
9
-
10
- # Returns the distance between items +a+ and +b+.
11
- def [](a, b)
12
- key = (a.object_id < b.object_id) ?
13
- "#{a.object_id},#{b.object_id}" :
14
- "#{b.object_id},#{a.object_id}"
15
- @distances[key] ||= Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2)
16
- end
17
-
18
- # Returns the pair of items that are nearest to each other.
19
- def nearest
20
- @nearest ||= begin
21
- key = nearest_pair[0]
22
- a_id, b_id = key.split ','
23
- a = @items.select {|item| item.object_id.to_s == a_id}.first
24
- b = @items.select {|item| item.object_id.to_s == b_id}.first
25
- [a, b]
26
- end
27
- end
28
-
29
- # Returns all items except the pair that are nearest to each other.
30
- def outliers
31
- @outliers ||= begin
32
- @items - nearest
33
- end
34
- end
35
-
36
- # Returns the smallest distance between any pair of items.
37
- def separation
38
- @separation ||= @items.size < 2 ? 0 : nearest_pair[1]
39
- end
40
-
41
- private
42
-
43
- def nearest_pair
44
- prepopulate_cache
45
- @distances.to_a.sort{|a, b| a[1] <=> b[1]}.first
46
- end
47
-
48
- def prepopulate_cache
9
+ @separation = 0
10
+ @nearest = []
49
11
  items = @items.dup
50
12
  while !items.empty?
51
13
  origin = items.shift
52
- items.each {|item| self[origin, item]}
14
+ items.each do |other|
15
+ distance = origin.distance_to(other)
16
+ if @separation == 0 or distance < @separation
17
+ @separation = distance
18
+ @nearest = [origin, other]
19
+ end
20
+ end
53
21
  end
22
+ @outliers = @items - @nearest
54
23
  end
55
24
  end
56
25
  end
@@ -12,7 +12,12 @@ module Hierclust
12
12
  @x = x
13
13
  @y = y
14
14
  end
15
-
15
+
16
+ # Returns this distance from this Point to an +other+ Point.
17
+ def distance_to(other)
18
+ Math.sqrt((other.x - self.x) ** 2 + (other.y - self.y) ** 2)
19
+ end
20
+
16
21
  # Simplifies code by letting us treat Clusters and Points interchangeably
17
22
  def size #:nodoc:
18
23
  1
@@ -20,7 +25,7 @@ module Hierclust
20
25
 
21
26
  # Simplifies code by letting us treat Clusters and Points interchangeably
22
27
  def points #:nodoc:
23
- self
28
+ [self]
24
29
  end
25
30
 
26
31
  # Returns a legible representation of this Point.
@@ -29,8 +34,17 @@ module Hierclust
29
34
  end
30
35
 
31
36
  # Sorts points relative to each other on the x-axis.
37
+ #
38
+ # Uses y-axis as a tie-breaker, so that sorting is stable even if
39
+ # multiple points have the same x-coordinate.
40
+ #
41
+ # Uses object_id as a final tie-breaker, so sorts are guaranteed to
42
+ # be stable even when multiple points have the same coordinates.
32
43
  def <=>(other)
33
- return self.x <=> other.x
44
+ cmp = self.x <=> other.x
45
+ cmp = self.y <=> other.y if cmp == 0
46
+ cmp = self.object_id <=> other.object_id if cmp == 0
47
+ return cmp
34
48
  end
35
49
  end
36
50
  end
@@ -2,7 +2,7 @@ module Hierclust #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -11,14 +11,6 @@ module Hierclust
11
11
  @dist = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2)
12
12
  end
13
13
 
14
- it "should tell us the distance between those points" do
15
- @d[@p_1, @p_2].should == @dist
16
- end
17
-
18
- it "should tell us the distance between those points backwards" do
19
- @d[@p_1, @p_2].should == @dist
20
- end
21
-
22
14
  it "should have separation equal to distance between those points" do
23
15
  @d.separation.should == @dist
24
16
  end
@@ -6,7 +6,8 @@ module Hierclust
6
6
  @p = Point.new(1, 1)
7
7
  end
8
8
 
9
- it "should return no clusters" do
9
+ it "should have points that is an array of only self" do
10
+ @p.points.should == [@p]
10
11
  end
11
12
  end
12
13
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Simple Hierarchical Clustering</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.1</a>
36
+ <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.2</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;hierclust&#8217;</h1>
39
39
 
@@ -105,7 +105,7 @@ to your data:</p>
105
105
  <span class="ident">map</span><span class="punct">.</span><span class="ident">add_point</span><span class="punct">(</span>
106
106
  <span class="ident">x</span> <span class="punct">=&gt;</span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">x</span><span class="punct">,</span>
107
107
  <span class="ident">y</span> <span class="punct">=&gt;</span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">y</span><span class="punct">,</span>
108
- <span class="ident">label</span> <span class="punct">=&gt;</span> <span class="punct">&quot;</span><span class="string"><span class="expr">#{cluster.size}</span> Things</span><span class="punct">&quot;</span>
108
+ <span class="ident">label</span> <span class="punct">=&gt;</span> <span class="punct">&quot;</span><span class="string"><span class="expr">#{cluster.points}</span> Things</span><span class="punct">&quot;</span>
109
109
  <span class="punct">)</span>
110
110
  <span class="keyword">end</span></pre></p>
111
111
 
@@ -125,10 +125,7 @@ to your data:</p>
125
125
  <h2>Source code</h2>
126
126
 
127
127
 
128
- <p>The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.</p>
129
-
130
-
131
- <p>You can also browse the source online at <a href="http://hierclust.rubyforge.org/svn/trunk/">http://hierclust.rubyforge.org/svn/trunk/</a></p>
128
+ <p>You can browse the source at <a href="http://hierclust.rubyforge.org/svn/trunk/">http://hierclust.rubyforge.org/svn/trunk/</a></p>
132
129
 
133
130
 
134
131
  <h2>How to submit patches</h2>
@@ -148,7 +145,7 @@ to your data:</p>
148
145
 
149
146
  <p>Comments are welcome. Send an email to <a href="mailto:brandt@kurowski.net">Brandt Kurowski</a> email via the <a href="http://groups.google.com/group/hierclust">forum</a></p>
150
147
  <p class="coda">
151
- <a href="http://brandt.kurowski.net/">Brandt Kurowski</a>, 4th February 2008<br>
148
+ <a href="http://brandt.kurowski.net/">Brandt Kurowski</a>, 6th February 2008<br>
152
149
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
153
150
  </p>
154
151
  </div>
data/website/index.txt CHANGED
@@ -52,7 +52,7 @@ clusters.each do |cluster|
52
52
  map.add_point(
53
53
  x => cluster.x,
54
54
  y => cluster.y,
55
- label => "#{cluster.size} Things"
55
+ label => "#{cluster.points} Things"
56
56
  )
57
57
  end</pre>
58
58
 
@@ -66,9 +66,7 @@ h2. Forum
66
66
 
67
67
  h2. Source code
68
68
 
69
- The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.
70
-
71
- You can also browse the source online at "http://hierclust.rubyforge.org/svn/trunk/":http://hierclust.rubyforge.org/svn/trunk/
69
+ You can browse the source at "http://hierclust.rubyforge.org/svn/trunk/":http://hierclust.rubyforge.org/svn/trunk/
72
70
 
73
71
  h2. How to submit patches
74
72
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hierclust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandt Kurowski
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-04 00:00:00 -05:00
12
+ date: 2008-02-08 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15