hierclust 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.1.3 2008-02-10
2
+
3
+ * 1 performance improvement
4
+ * added linear-time preclustering based on minimum separation distance
5
+ * 1 major change:
6
+ * when minimum separation is given, the clusterer will no longer calculate
7
+ and return clusters smaller than "separation / 2.0"
8
+
1
9
  == 0.1.2 2008-02-07
2
10
 
3
11
  * 1 performance improvement
@@ -24,6 +24,12 @@ module Hierclust
24
24
  @y ||= @items.inject(0) {|sum, p| sum + p.y} / size
25
25
  end
26
26
 
27
+ # Add an +item+ to this Cluster.
28
+ def <<(item)
29
+ @x, @y = nil, nil # flush cached pseudo-attributes
30
+ @items << item
31
+ end
32
+
27
33
  # Returns the number of items in this Cluster.
28
34
  def size
29
35
  @items.size
@@ -11,8 +11,8 @@ module Hierclust
11
11
  # Specify +separation+ to stop the clustering process once all the
12
12
  # items are at least +separation+ units apart.
13
13
  def initialize(data, separation = nil)
14
- @data = data.dup
15
14
  @separation = separation
15
+ @data = precluster(data)
16
16
  @distances = Distances.new(@data)
17
17
  end
18
18
 
@@ -43,5 +43,26 @@ module Hierclust
43
43
  [Cluster.new(nearest), *outliers]
44
44
  end
45
45
  end
46
+
47
+ def precluster(points)
48
+ if @separation.nil?
49
+ # can't precluster w/ no min separation given
50
+ return points.dup
51
+ end
52
+ if @separation == 0
53
+ # if no separation is asked for, it's all one cluster
54
+ return [Cluster.new(points)]
55
+ end
56
+ grid_size = @separation / 2.0
57
+ grid_clusters = Hash.new
58
+ points.each do |point|
59
+ grid_x = (point.x / grid_size).floor
60
+ grid_y = (point.y / grid_size).floor
61
+ grid_clusters[grid_x] ||= Hash.new
62
+ grid_clusters[grid_x][grid_y] ||= Cluster.new([])
63
+ grid_clusters[grid_x][grid_y] << point
64
+ end
65
+ grid_clusters.values.map{|h| h.values}.flatten
66
+ end
46
67
  end
47
68
  end
@@ -21,5 +21,27 @@ module Hierclust
21
21
  end
22
22
  @outliers = @items - @nearest
23
23
  end
24
+
25
+ =begin
26
+
27
+ old idea
28
+
29
+ 1 calculate all distances
30
+ 2 update distances when a new cluster is created from two existing points
31
+ 3 keep distances sorted by separation so that we always know which is shortest
32
+
33
+ new idea
34
+
35
+ don't worry about the lower level clusters
36
+ don't worry about the higher level clusters
37
+ just form clusters of the desired separation
38
+ start by dividing the points into a grid of 0.5 * sep
39
+ and put all points in the same grid cells together
40
+ ...
41
+ and then do regular hierarchical clustering! we should be fine at that point.
42
+ sweet....
43
+
44
+ =end
45
+
24
46
  end
25
47
  end
@@ -2,7 +2,7 @@ module Hierclust #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Simple Hierarchical Clustering</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.2</a>
36
+ <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.3</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;hierclust&#8217;</h1>
39
39
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hierclust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandt Kurowski
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-08 00:00:00 -05:00
12
+ date: 2008-02-10 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15