hierclust 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +8 -0
- data/lib/hierclust/cluster.rb +6 -0
- data/lib/hierclust/clusterer.rb +22 -1
- data/lib/hierclust/distances.rb +22 -0
- data/lib/hierclust/version.rb +1 -1
- data/website/index.html +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
== 0.1.3 2008-02-10
|
2
|
+
|
3
|
+
* 1 performance improvement
|
4
|
+
* added linear-time preclustering based on minimum separation distance
|
5
|
+
* 1 major change:
|
6
|
+
* when minimum separation is given, the clusterer will no longer calculate
|
7
|
+
and return clusters smaller than "separation / 2.0"
|
8
|
+
|
1
9
|
== 0.1.2 2008-02-07
|
2
10
|
|
3
11
|
* 1 performance improvement
|
data/lib/hierclust/cluster.rb
CHANGED
@@ -24,6 +24,12 @@ module Hierclust
|
|
24
24
|
@y ||= @items.inject(0) {|sum, p| sum + p.y} / size
|
25
25
|
end
|
26
26
|
|
27
|
+
# Add an +item+ to this Cluster.
|
28
|
+
def <<(item)
|
29
|
+
@x, @y = nil, nil # flush cached pseudo-attributes
|
30
|
+
@items << item
|
31
|
+
end
|
32
|
+
|
27
33
|
# Returns the number of items in this Cluster.
|
28
34
|
def size
|
29
35
|
@items.size
|
data/lib/hierclust/clusterer.rb
CHANGED
@@ -11,8 +11,8 @@ module Hierclust
|
|
11
11
|
# Specify +separation+ to stop the clustering process once all the
|
12
12
|
# items are at least +separation+ units apart.
|
13
13
|
def initialize(data, separation = nil)
|
14
|
-
@data = data.dup
|
15
14
|
@separation = separation
|
15
|
+
@data = precluster(data)
|
16
16
|
@distances = Distances.new(@data)
|
17
17
|
end
|
18
18
|
|
@@ -43,5 +43,26 @@ module Hierclust
|
|
43
43
|
[Cluster.new(nearest), *outliers]
|
44
44
|
end
|
45
45
|
end
|
46
|
+
|
47
|
+
def precluster(points)
|
48
|
+
if @separation.nil?
|
49
|
+
# can't precluster w/ no min separation given
|
50
|
+
return points.dup
|
51
|
+
end
|
52
|
+
if @separation == 0
|
53
|
+
# if no separation is asked for, it's all one cluster
|
54
|
+
return [Cluster.new(points)]
|
55
|
+
end
|
56
|
+
grid_size = @separation / 2.0
|
57
|
+
grid_clusters = Hash.new
|
58
|
+
points.each do |point|
|
59
|
+
grid_x = (point.x / grid_size).floor
|
60
|
+
grid_y = (point.y / grid_size).floor
|
61
|
+
grid_clusters[grid_x] ||= Hash.new
|
62
|
+
grid_clusters[grid_x][grid_y] ||= Cluster.new([])
|
63
|
+
grid_clusters[grid_x][grid_y] << point
|
64
|
+
end
|
65
|
+
grid_clusters.values.map{|h| h.values}.flatten
|
66
|
+
end
|
46
67
|
end
|
47
68
|
end
|
data/lib/hierclust/distances.rb
CHANGED
@@ -21,5 +21,27 @@ module Hierclust
|
|
21
21
|
end
|
22
22
|
@outliers = @items - @nearest
|
23
23
|
end
|
24
|
+
|
25
|
+
=begin
|
26
|
+
|
27
|
+
old idea
|
28
|
+
|
29
|
+
1 calculate all distances
|
30
|
+
2 update distances when a new cluster is created from two existing points
|
31
|
+
3 keep distances sorted by separation so that we always know which is shortest
|
32
|
+
|
33
|
+
new idea
|
34
|
+
|
35
|
+
don't worry about the lower level clusters
|
36
|
+
don't worry about the higher level clusters
|
37
|
+
just form clusters of the desired separation
|
38
|
+
start by dividing the points into a grid of 0.5 * sep
|
39
|
+
and put all points in the same grid cells together
|
40
|
+
...
|
41
|
+
and then do regular hierarchical clustering! we should be fine at that point.
|
42
|
+
sweet....
|
43
|
+
|
44
|
+
=end
|
45
|
+
|
24
46
|
end
|
25
47
|
end
|
data/lib/hierclust/version.rb
CHANGED
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>Simple Hierarchical Clustering</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.
|
36
|
+
<a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.3</a>
|
37
37
|
</div>
|
38
38
|
<h1>→ ‘hierclust’</h1>
|
39
39
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hierclust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brandt Kurowski
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-02-
|
12
|
+
date: 2008-02-10 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|