hierclust 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.1.3 2008-02-10
2
+
3
+ * 1 performance improvement
4
+ * added linear-time preclustering based on minimum separation distance
5
+ * 1 major change:
6
+ * when minimum separation is given, the clusterer will no longer calculate
7
+ and return clusters smaller than "separation / 2.0"
8
+
1
9
  == 0.1.2 2008-02-07
2
10
 
3
11
  * 1 performance improvement
@@ -24,6 +24,12 @@ module Hierclust
24
24
  @y ||= @items.inject(0) {|sum, p| sum + p.y} / size
25
25
  end
26
26
 
27
+ # Add an +item+ to this Cluster.
28
+ def <<(item)
29
+ @x, @y = nil, nil # flush cached pseudo-attributes
30
+ @items << item
31
+ end
32
+
27
33
  # Returns the number of items in this Cluster.
28
34
  def size
29
35
  @items.size
@@ -11,8 +11,8 @@ module Hierclust
11
11
  # Specify +separation+ to stop the clustering process once all the
12
12
  # items are at least +separation+ units apart.
13
13
  def initialize(data, separation = nil)
14
- @data = data.dup
15
14
  @separation = separation
15
+ @data = precluster(data)
16
16
  @distances = Distances.new(@data)
17
17
  end
18
18
 
@@ -43,5 +43,26 @@ module Hierclust
43
43
  [Cluster.new(nearest), *outliers]
44
44
  end
45
45
  end
46
+
47
+ def precluster(points)
48
+ if @separation.nil?
49
+ # can't precluster w/ no min separation given
50
+ return points.dup
51
+ end
52
+ if @separation == 0
53
+ # if no separation is asked for, it's all one cluster
54
+ return [Cluster.new(points)]
55
+ end
56
+ grid_size = @separation / 2.0
57
+ grid_clusters = Hash.new
58
+ points.each do |point|
59
+ grid_x = (point.x / grid_size).floor
60
+ grid_y = (point.y / grid_size).floor
61
+ grid_clusters[grid_x] ||= Hash.new
62
+ grid_clusters[grid_x][grid_y] ||= Cluster.new([])
63
+ grid_clusters[grid_x][grid_y] << point
64
+ end
65
+ grid_clusters.values.map{|h| h.values}.flatten
66
+ end
46
67
  end
47
68
  end
@@ -21,5 +21,27 @@ module Hierclust
21
21
  end
22
22
  @outliers = @items - @nearest
23
23
  end
24
+
25
+ =begin
26
+
27
+ old idea
28
+
29
+ 1 calculate all distances
30
+ 2 update distances when a new cluster is created from two existing points
31
+ 3 keep distances sorted by separation so that we always know which is shortest
32
+
33
+ new idea
34
+
35
+ don't worry about the lower level clusters
36
+ don't worry about the higher level clusters
37
+ just form clusters of the desired separation
38
+ start by dividing the points into a grid of 0.5 * sep
39
+ and put all points in the same grid cells together
40
+ ...
41
+ and then do regular hierarchical clustering! we should be fine at that point.
42
+ sweet....
43
+
44
+ =end
45
+
24
46
  end
25
47
  end
@@ -2,7 +2,7 @@ module Hierclust #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 2
5
+ TINY = 3
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>Simple Hierarchical Clustering</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.2</a>
36
+ <a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.3</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;hierclust&#8217;</h1>
39
39
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hierclust
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brandt Kurowski
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-02-08 00:00:00 -05:00
12
+ date: 2008-02-10 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15