hierclust 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +34 -0
- data/README.txt +14 -0
- data/Rakefile +4 -0
- data/config/hoe.rb +71 -0
- data/config/requirements.rb +17 -0
- data/lib/hierclust/cluster.rb +43 -0
- data/lib/hierclust/clusterer.rb +47 -0
- data/lib/hierclust/distances.rb +56 -0
- data/lib/hierclust/point.rb +31 -0
- data/lib/hierclust/version.rb +9 -0
- data/lib/hierclust.rb +9 -0
- data/log/debug.log +0 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/spec/hierclust/cluster_spec.rb +52 -0
- data/spec/hierclust/clusterer_spec.rb +167 -0
- data/spec/hierclust/distances_spec.rb +52 -0
- data/spec/hierclust/point_spec.rb +12 -0
- data/spec/hierclust_spec.rb +4 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/rspec.rake +20 -0
- data/tasks/website.rake +17 -0
- data/website/index.html +147 -0
- data/website/index.txt +76 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +95 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Cluster, " with no points" do
|
5
|
+
before do
|
6
|
+
@c = Cluster.new([])
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have nil x-coordinate" do
|
10
|
+
@c.x.should be_nil
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have nil y-coordinate" do
|
14
|
+
@c.y.should be_nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe Cluster, " with one point" do
|
19
|
+
before do
|
20
|
+
@x = 123
|
21
|
+
@y = 234
|
22
|
+
@p = Point.new(@x, @y)
|
23
|
+
@c = Cluster.new([@p])
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should have the same x-coordinate as the point" do
|
27
|
+
@c.x.should == @p.x
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should have the same y-coordinate as the point" do
|
31
|
+
@c.y.should == @p.y
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe Cluster, " with two points" do
|
36
|
+
before do
|
37
|
+
@x_1, @x_2 = 5, 15
|
38
|
+
@y_1, @y_2 = 4, 8
|
39
|
+
@p_1 = Point.new(@x_1, @y_1)
|
40
|
+
@p_2 = Point.new(@x_2, @y_2)
|
41
|
+
@c = Cluster.new([@p_1, @p_2])
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have x-coordinate at average of point's x-coordinates" do
|
45
|
+
@c.x.should == 10
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have y-coordinate at average of point's y-coordinates" do
|
49
|
+
@c.y.should == 6
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Clusterer, " with no data" do
|
5
|
+
before do
|
6
|
+
@c = Clusterer.new([])
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return no clusters" do
|
10
|
+
@c.clusters.should == []
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe Clusterer, " with one point" do
|
15
|
+
before do
|
16
|
+
@x = 1
|
17
|
+
@y = 2
|
18
|
+
@p = Point.new(@x, @y)
|
19
|
+
@c = Clusterer.new([@p])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return the point" do
|
23
|
+
@c.clusters.should == [@p]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe Clusterer, " with two points" do
|
28
|
+
before do
|
29
|
+
@x_1, @x_2 = 1, 5
|
30
|
+
@y_1, @y_2 = 2, 8
|
31
|
+
@p_1 = Point.new(@x_1, @y_1)
|
32
|
+
@p_2 = Point.new(@x_2, @y_2)
|
33
|
+
@c = Clusterer.new([@p_1, @p_2])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should return one cluster" do
|
37
|
+
@c.clusters.size.should == 1
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should have two points in the cluster" do
|
41
|
+
@c.clusters.first.size.should == 2
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have the first point in the cluster" do
|
45
|
+
@c.clusters.first.should include(@p_1)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have the second point in the cluster" do
|
49
|
+
@c.clusters.first.should include(@p_2)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe Clusterer, " with three points" do
|
54
|
+
before do
|
55
|
+
@x_1, @x_2, @x_3 = 1, 5, 2
|
56
|
+
@y_1, @y_2, @y_3 = 2, 6, 3
|
57
|
+
@p_1 = Point.new(@x_1, @y_1)
|
58
|
+
@p_2 = Point.new(@x_2, @y_2)
|
59
|
+
@p_3 = Point.new(@x_3, @y_3)
|
60
|
+
@c = Clusterer.new([@p_1, @p_2, @p_3])
|
61
|
+
@cluster = @c.clusters.first
|
62
|
+
@clusters = @cluster.items.sort_by{|c|c.size}
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should return one cluster" do
|
66
|
+
@c.clusters.size.should == 1
|
67
|
+
end
|
68
|
+
|
69
|
+
it "containing two items" do
|
70
|
+
@cluster.items.size.should == 2
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should have one Cluster" do
|
74
|
+
@clusters[1].class.should == Cluster
|
75
|
+
end
|
76
|
+
|
77
|
+
it "and one Point" do
|
78
|
+
@clusters[0].class.should == Point
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have the first and third points in the bigger cluster" do
|
82
|
+
@clusters[1].should include(@p_1, @p_3)
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should have the second point in the smaller cluster" do
|
86
|
+
@clusters[0].should == @p_2
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
FOUR_P = [
|
91
|
+
Point.new(0, 1),
|
92
|
+
Point.new(1, 0),
|
93
|
+
Point.new(3, 4),
|
94
|
+
Point.new(4, 3),
|
95
|
+
]
|
96
|
+
|
97
|
+
describe Clusterer, " with four points and no separation" do
|
98
|
+
before do
|
99
|
+
@c = Clusterer.new(FOUR_P)
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should return one cluster" do
|
103
|
+
@c.clusters.size.should == 1
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe Clusterer, " with four points and separation 1" do
|
108
|
+
before do
|
109
|
+
@c = Clusterer.new(FOUR_P, 1)
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should return all four individual points" do
|
113
|
+
@c.clusters.size.should == 4
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe Clusterer, " with four points and separation 2" do
|
118
|
+
before do
|
119
|
+
@c = Clusterer.new(FOUR_P, 2)
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return two clusters" do
|
123
|
+
@c.clusters.size.should == 2
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
EIGHT_P = [
|
128
|
+
Point.new(0, 1),
|
129
|
+
Point.new(1, 0),
|
130
|
+
Point.new(3, 4),
|
131
|
+
Point.new(4, 3),
|
132
|
+
Point.new(7, 8),
|
133
|
+
Point.new(8, 7),
|
134
|
+
Point.new(8, 9),
|
135
|
+
Point.new(9, 8),
|
136
|
+
]
|
137
|
+
|
138
|
+
describe Clusterer, " with eight points and no separation" do
|
139
|
+
before do
|
140
|
+
@c = Clusterer.new(EIGHT_P)
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should return one cluster when no minimum separation is given" do
|
144
|
+
@c.clusters.size.should == 1
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
describe Clusterer, " with eight points and separation 1" do
|
149
|
+
before do
|
150
|
+
@c = Clusterer.new(EIGHT_P, 1)
|
151
|
+
end
|
152
|
+
|
153
|
+
it "should return all eight individual points" do
|
154
|
+
@c.clusters.size.should == 8
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe Clusterer, " with eight points and separation 2" do
|
159
|
+
before do
|
160
|
+
@c = Clusterer.new(EIGHT_P, 2)
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should return three clusters" do
|
164
|
+
@c.clusters.size.should == 3
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Distances, " with two points" do
|
5
|
+
before do
|
6
|
+
@x_1, @x_2 = 1, 5
|
7
|
+
@y_1, @y_2 = 2, 8
|
8
|
+
@p_1 = Point.new(@x_1, @y_1)
|
9
|
+
@p_2 = Point.new(@x_2, @y_2)
|
10
|
+
@d = Distances.new([@p_1, @p_2])
|
11
|
+
@dist = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should tell us the distance between those points" do
|
15
|
+
@d[@p_1, @p_2].should == @dist
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should tell us the distance between those points backwards" do
|
19
|
+
@d[@p_1, @p_2].should == @dist
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should have separation equal to distance between those points" do
|
23
|
+
@d.separation.should == @dist
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe Distances, " with three points" do
|
28
|
+
before do
|
29
|
+
@x_1, @x_2, @x_3 = 1, 5, 3
|
30
|
+
@y_1, @y_2, @y_3 = 2, 8, 4
|
31
|
+
@p_1 = Point.new(@x_1, @y_1)
|
32
|
+
@p_2 = Point.new(@x_2, @y_2)
|
33
|
+
@p_3 = Point.new(@x_3, @y_3)
|
34
|
+
@d = Distances.new([@p_1, @p_2, @p_3])
|
35
|
+
@dist_1_2 = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2)
|
36
|
+
@dist_2_3 = Math.sqrt((@x_2 - @x_3) ** 2 + (@y_2 - @y_3) ** 2)
|
37
|
+
@dist_3_1 = Math.sqrt((@x_3 - @x_1) ** 2 + (@y_3 - @y_1) ** 2)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should tell us the nearest points" do
|
41
|
+
@d.nearest.should include(@p_1, @p_3)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should tell us the outliers" do
|
45
|
+
@d.outliers.should == [@p_2]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have separation equal to distance between nearest points" do
|
49
|
+
@d.separation.should == @dist_3_1
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
desc 'Release the website and new gem version'
|
2
|
+
task :deploy => [:check_version, :website, :release] do
|
3
|
+
puts "Remember to create SVN tag:"
|
4
|
+
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
5
|
+
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
6
|
+
puts "Suggested comment:"
|
7
|
+
puts "Tagging release #{CHANGES}"
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
11
|
+
task :local_deploy => [:website_generate, :install_gem]
|
12
|
+
|
13
|
+
task :check_version do
|
14
|
+
unless ENV['VERSION']
|
15
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
unless ENV['VERSION'] == VERS
|
19
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
20
|
+
exit
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'Install the package as a gem, without generating documentation(ri/rdoc)'
|
25
|
+
task :install_gem_no_doc => [:clean, :package] do
|
26
|
+
sh "#{'sudo ' unless Hoe::WINDOZE }gem install pkg/*.gem --no-rdoc --no-ri"
|
27
|
+
end
|
28
|
+
|
29
|
+
namespace :manifest do
|
30
|
+
desc 'Recreate Manifest.txt to include ALL files'
|
31
|
+
task :refresh do
|
32
|
+
`rake check_manifest | patch -p0 > Manifest.txt`
|
33
|
+
end
|
34
|
+
end
|
data/tasks/rspec.rake
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
begin
|
2
|
+
require 'spec/rake/spectask'
|
3
|
+
rescue LoadError
|
4
|
+
begin
|
5
|
+
require 'rubygems'
|
6
|
+
require 'spec/rake/spectask'
|
7
|
+
rescue LoadError
|
8
|
+
puts <<-EOS
|
9
|
+
To use rspec for testing you must install rspec gem:
|
10
|
+
gem install rspec
|
11
|
+
EOS
|
12
|
+
exit(0)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Run the specs under spec/"
|
17
|
+
Spec::Rake::SpecTask.new do |t|
|
18
|
+
t.spec_opts = ['--options', "spec/spec.opts"]
|
19
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
20
|
+
end
|
data/tasks/website.rake
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
desc 'Generate website files'
|
2
|
+
task :website_generate => :ruby_env do
|
3
|
+
(Dir['website/**/*.txt'] - Dir['website/version*.txt']).each do |txt|
|
4
|
+
sh %{ #{RUBY_APP} script/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
desc 'Upload website files to rubyforge'
|
9
|
+
task :website_upload do
|
10
|
+
host = "#{rubyforge_username}@rubyforge.org"
|
11
|
+
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
12
|
+
local_dir = 'website'
|
13
|
+
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
14
|
+
end
|
15
|
+
|
16
|
+
desc 'Generate and upload website files'
|
17
|
+
task :website => [:website_generate, :website_upload, :publish_docs]
|
data/website/index.html
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
|
+
<head>
|
5
|
+
<link rel="stylesheet" href="stylesheets/screen.css" type="text/css" media="screen" />
|
6
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
7
|
+
<title>
|
8
|
+
Simple Hierarchical Clustering
|
9
|
+
</title>
|
10
|
+
<script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
|
11
|
+
<style>
|
12
|
+
|
13
|
+
</style>
|
14
|
+
<script type="text/javascript">
|
15
|
+
window.onload = function() {
|
16
|
+
settings = {
|
17
|
+
tl: { radius: 10 },
|
18
|
+
tr: { radius: 10 },
|
19
|
+
bl: { radius: 10 },
|
20
|
+
br: { radius: 10 },
|
21
|
+
antiAlias: true,
|
22
|
+
autoPad: true,
|
23
|
+
validTags: ["div"]
|
24
|
+
}
|
25
|
+
var versionBox = new curvyCorners(settings, document.getElementById("version"));
|
26
|
+
versionBox.applyCornersToAll();
|
27
|
+
}
|
28
|
+
</script>
|
29
|
+
</head>
|
30
|
+
<body>
|
31
|
+
<div id="main">
|
32
|
+
|
33
|
+
<h1>Simple Hierarchical Clustering</h1>
|
34
|
+
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
|
35
|
+
<p>Get Version</p>
|
36
|
+
<a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.0</a>
|
37
|
+
</div>
|
38
|
+
<h1>→ ‘hierclust’</h1>
|
39
|
+
|
40
|
+
|
41
|
+
<h2>What</h2>
|
42
|
+
|
43
|
+
|
44
|
+
<p>Given a set of points, organizes them into clusters. You can either have it continue clustering until all the clusters are organized into larger clusters, or tell it to stop once a certain minimum level of separation between clusters has been reached.</p>
|
45
|
+
|
46
|
+
|
47
|
+
<p>Useful for taking a large set of points to be plotted on a map, and reducing them to a smaller number of clusters, separated enough so that the map remains legible.</p>
|
48
|
+
|
49
|
+
|
50
|
+
<h2>Installing</h2>
|
51
|
+
|
52
|
+
|
53
|
+
<p><pre class='syntax'><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">hierclust</span></pre></p>
|
54
|
+
|
55
|
+
|
56
|
+
<h2>The basics</h2>
|
57
|
+
|
58
|
+
|
59
|
+
<p><pre class='syntax'><span class="ident">points</span> <span class="punct">=</span> <span class="punct">(</span><span class="number">1</span><span class="punct">..</span><span class="number">6</span><span class="punct">).</span><span class="ident">map</span> <span class="punct">{</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Point</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">rand</span><span class="punct">(</span><span class="number">10</span><span class="punct">),</span> <span class="ident">rand</span><span class="punct">(</span><span class="number">10</span><span class="punct">))</span> <span class="punct">}</span>
|
60
|
+
<span class="ident">clusterer</span> <span class="punct">=</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Clusterer</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">points</span><span class="punct">)</span>
|
61
|
+
<span class="ident">puts</span> <span class="ident">clusterer</span><span class="punct">.</span><span class="ident">clusters</span> <span class="comment"># => [[[(4, 9), (4, 8)], (9, 6)], [[(1, 4), (3, 1)], (6, 3)]]</span></pre></p>
|
62
|
+
|
63
|
+
|
64
|
+
<h2>Demonstration of usage</h2>
|
65
|
+
|
66
|
+
|
67
|
+
<p>Let’s say you have an existing set of objects with latitudes and longitudes, and you want to organize them into clusters that are separated by at least 5 degrees (for simplicity’s sake we’ll pretend that latitudes and longitude form a rectangular grid).</p>
|
68
|
+
|
69
|
+
|
70
|
+
<p><pre class='syntax'><span class="ident">require</span> <span class="punct">'</span><span class="string">hierclust</span><span class="punct">'</span></pre></p>
|
71
|
+
|
72
|
+
|
73
|
+
<p>Start by extending the built-in Point class so that it can maintain a reference
|
74
|
+
to your data:</p>
|
75
|
+
|
76
|
+
|
77
|
+
<p><pre class='syntax'><span class="keyword">class </span><span class="class">Hierclust::Point</span>
|
78
|
+
<span class="ident">attr_accessor</span> <span class="symbol">:data</span>
|
79
|
+
<span class="keyword">end</span></pre></p>
|
80
|
+
|
81
|
+
|
82
|
+
<p>Then turn your data into a set of points:</p>
|
83
|
+
|
84
|
+
|
85
|
+
<p><pre class='syntax'><span class="ident">dataset</span> <span class="punct">=</span> <span class="constant">MyGeocodedThing</span><span class="punct">.</span><span class="ident">find</span><span class="punct">(</span><span class="symbol">:all</span><span class="punct">)</span>
|
86
|
+
<span class="ident">points</span> <span class="punct">=</span> <span class="ident">dataset</span><span class="punct">.</span><span class="ident">map</span> <span class="keyword">do</span> <span class="punct">|</span><span class="ident">thing</span><span class="punct">|</span>
|
87
|
+
<span class="ident">point</span> <span class="punct">=</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Point</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">thing</span><span class="punct">.</span><span class="ident">lon</span><span class="punct">,</span> <span class="ident">thing</span><span class="punct">.</span><span class="ident">lat</span><span class="punct">)</span>
|
88
|
+
<span class="ident">point</span><span class="punct">.</span><span class="ident">data</span> <span class="punct">=</span> <span class="ident">thing</span>
|
89
|
+
<span class="ident">point</span>
|
90
|
+
<span class="keyword">end</span></pre></p>
|
91
|
+
|
92
|
+
|
93
|
+
<p>Then tell Hierclust to cluster those points to at least 5 degrees separation:</p>
|
94
|
+
|
95
|
+
|
96
|
+
<p><pre class='syntax'><span class="ident">clusterer</span> <span class="punct">=</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Clusterer</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">points</span><span class="punct">,</span> <span class="number">5</span><span class="punct">)</span>
|
97
|
+
<span class="ident">clusters</span> <span class="punct">=</span> <span class="ident">clusterer</span><span class="punct">.</span><span class="ident">clusters</span></pre></p>
|
98
|
+
|
99
|
+
|
100
|
+
<p>Then do what you will with your clusters:</p>
|
101
|
+
|
102
|
+
|
103
|
+
<p><pre class='syntax'><span class="ident">map</span> <span class="punct">=</span> <span class="constant">MapThing</span><span class="punct">.</span><span class="ident">new</span>
|
104
|
+
<span class="ident">clusters</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span> <span class="punct">|</span><span class="ident">cluster</span><span class="punct">|</span>
|
105
|
+
<span class="ident">map</span><span class="punct">.</span><span class="ident">add_point</span><span class="punct">(</span>
|
106
|
+
<span class="ident">x</span> <span class="punct">=></span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">x</span><span class="punct">,</span>
|
107
|
+
<span class="ident">y</span> <span class="punct">=></span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">y</span><span class="punct">,</span>
|
108
|
+
<span class="ident">label</span> <span class="punct">=></span> <span class="punct">"</span><span class="string"><span class="expr">#{cluster.size}</span> Things</span><span class="punct">"</span>
|
109
|
+
<span class="punct">)</span>
|
110
|
+
<span class="keyword">end</span></pre></p>
|
111
|
+
|
112
|
+
|
113
|
+
<h2>Forum</h2>
|
114
|
+
|
115
|
+
|
116
|
+
<p><a href="http://groups.google.com/group/hierclust">http://groups.google.com/group/hierclust</a></p>
|
117
|
+
|
118
|
+
|
119
|
+
<h2>How to submit patches</h2>
|
120
|
+
|
121
|
+
|
122
|
+
<p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people’s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
|
123
|
+
|
124
|
+
|
125
|
+
<p>The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.</p>
|
126
|
+
|
127
|
+
|
128
|
+
<h2>License</h2>
|
129
|
+
|
130
|
+
|
131
|
+
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
132
|
+
|
133
|
+
|
134
|
+
<h2>Contact</h2>
|
135
|
+
|
136
|
+
|
137
|
+
<p>Comments are welcome. Send an email to <a href="mailto:FIXME"><span class="caps">FIXME</span> full name</a> email via the <a href="http://groups.google.com/group/hierclust">forum</a></p>
|
138
|
+
<p class="coda">
|
139
|
+
<a href="FIXME email">FIXME full name</a>, 2nd February 2008<br>
|
140
|
+
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
141
|
+
</p>
|
142
|
+
</div>
|
143
|
+
|
144
|
+
<!-- insert site tracking codes here, like Google Urchin -->
|
145
|
+
|
146
|
+
</body>
|
147
|
+
</html>
|
data/website/index.txt
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
h1. Simple Hierarchical Clustering
|
2
|
+
|
3
|
+
h1. → 'hierclust'
|
4
|
+
|
5
|
+
h2. What
|
6
|
+
|
7
|
+
Given a set of points, organizes them into clusters. You can either have it continue clustering until all the clusters are organized into larger clusters, or tell it to stop once a certain minimum level of separation between clusters has been reached.
|
8
|
+
|
9
|
+
Useful for taking a large set of points to be plotted on a map, and reducing them to a smaller number of clusters, separated enough so that the map remains legible.
|
10
|
+
|
11
|
+
h2. Installing
|
12
|
+
|
13
|
+
<pre syntax="ruby">sudo gem install hierclust</pre>
|
14
|
+
|
15
|
+
h2. The basics
|
16
|
+
|
17
|
+
<pre syntax="ruby">points = (1..6).map { Hierclust::Point.new(rand(10), rand(10)) }
|
18
|
+
clusterer = Hierclust::Clusterer.new(points)
|
19
|
+
puts clusterer.clusters # => [[[(4, 9), (4, 8)], (9, 6)], [[(1, 4), (3, 1)], (6, 3)]]</pre>
|
20
|
+
|
21
|
+
h2. Demonstration of usage
|
22
|
+
|
23
|
+
Let's say you have an existing set of objects with latitudes and longitudes, and you want to organize them into clusters that are separated by at least 5 degrees (for simplicity's sake we'll pretend that latitudes and longitude form a rectangular grid).
|
24
|
+
|
25
|
+
<pre syntax="ruby">require 'hierclust'</pre>
|
26
|
+
|
27
|
+
Start by extending the built-in Point class so that it can maintain a reference
|
28
|
+
to your data:
|
29
|
+
|
30
|
+
<pre syntax="ruby">class Hierclust::Point
|
31
|
+
attr_accessor :data
|
32
|
+
end</pre>
|
33
|
+
|
34
|
+
Then turn your data into a set of points:
|
35
|
+
|
36
|
+
<pre syntax="ruby">dataset = MyGeocodedThing.find(:all)
|
37
|
+
points = dataset.map do |thing|
|
38
|
+
point = Hierclust::Point.new(thing.lon, thing.lat)
|
39
|
+
point.data = thing
|
40
|
+
point
|
41
|
+
end</pre>
|
42
|
+
|
43
|
+
Then tell Hierclust to cluster those points to at least 5 degrees separation:
|
44
|
+
|
45
|
+
<pre syntax="ruby">clusterer = Hierclust::Clusterer.new(points, 5)
|
46
|
+
clusters = clusterer.clusters</pre>
|
47
|
+
|
48
|
+
Then do what you will with your clusters:
|
49
|
+
|
50
|
+
<pre syntax="ruby">map = MapThing.new
|
51
|
+
clusters.each do |cluster|
|
52
|
+
map.add_point(
|
53
|
+
x => cluster.x,
|
54
|
+
y => cluster.y,
|
55
|
+
label => "#{cluster.size} Things"
|
56
|
+
)
|
57
|
+
end</pre>
|
58
|
+
|
59
|
+
h2. Forum
|
60
|
+
|
61
|
+
"http://groups.google.com/group/hierclust":http://groups.google.com/group/hierclust
|
62
|
+
|
63
|
+
h2. How to submit patches
|
64
|
+
|
65
|
+
Read the "8 steps for fixing other people's code":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/ and for section "8b: Submit patch to Google Groups":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups, use the Google Group above.
|
66
|
+
|
67
|
+
The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.
|
68
|
+
|
69
|
+
h2. License
|
70
|
+
|
71
|
+
This code is free to use under the terms of the MIT license.
|
72
|
+
|
73
|
+
h2. Contact
|
74
|
+
|
75
|
+
Comments are welcome. Send an email to "FIXME full name":mailto:FIXME email via the "forum":http://groups.google.com/group/hierclust
|
76
|
+
|