hierclust 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +34 -0
- data/README.txt +14 -0
- data/Rakefile +4 -0
- data/config/hoe.rb +71 -0
- data/config/requirements.rb +17 -0
- data/lib/hierclust/cluster.rb +43 -0
- data/lib/hierclust/clusterer.rb +47 -0
- data/lib/hierclust/distances.rb +56 -0
- data/lib/hierclust/point.rb +31 -0
- data/lib/hierclust/version.rb +9 -0
- data/lib/hierclust.rb +9 -0
- data/log/debug.log +0 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/spec/hierclust/cluster_spec.rb +52 -0
- data/spec/hierclust/clusterer_spec.rb +167 -0
- data/spec/hierclust/distances_spec.rb +52 -0
- data/spec/hierclust/point_spec.rb +12 -0
- data/spec/hierclust_spec.rb +4 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/rspec.rake +20 -0
- data/tasks/website.rake +17 -0
- data/website/index.html +147 -0
- data/website/index.txt +76 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +95 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Cluster, " with no points" do
|
5
|
+
before do
|
6
|
+
@c = Cluster.new([])
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have nil x-coordinate" do
|
10
|
+
@c.x.should be_nil
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have nil y-coordinate" do
|
14
|
+
@c.y.should be_nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe Cluster, " with one point" do
|
19
|
+
before do
|
20
|
+
@x = 123
|
21
|
+
@y = 234
|
22
|
+
@p = Point.new(@x, @y)
|
23
|
+
@c = Cluster.new([@p])
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should have the same x-coordinate as the point" do
|
27
|
+
@c.x.should == @p.x
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should have the same y-coordinate as the point" do
|
31
|
+
@c.y.should == @p.y
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe Cluster, " with two points" do
|
36
|
+
before do
|
37
|
+
@x_1, @x_2 = 5, 15
|
38
|
+
@y_1, @y_2 = 4, 8
|
39
|
+
@p_1 = Point.new(@x_1, @y_1)
|
40
|
+
@p_2 = Point.new(@x_2, @y_2)
|
41
|
+
@c = Cluster.new([@p_1, @p_2])
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have x-coordinate at average of point's x-coordinates" do
|
45
|
+
@c.x.should == 10
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have y-coordinate at average of point's y-coordinates" do
|
49
|
+
@c.y.should == 6
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Clusterer, " with no data" do
|
5
|
+
before do
|
6
|
+
@c = Clusterer.new([])
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should return no clusters" do
|
10
|
+
@c.clusters.should == []
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe Clusterer, " with one point" do
|
15
|
+
before do
|
16
|
+
@x = 1
|
17
|
+
@y = 2
|
18
|
+
@p = Point.new(@x, @y)
|
19
|
+
@c = Clusterer.new([@p])
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return the point" do
|
23
|
+
@c.clusters.should == [@p]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe Clusterer, " with two points" do
|
28
|
+
before do
|
29
|
+
@x_1, @x_2 = 1, 5
|
30
|
+
@y_1, @y_2 = 2, 8
|
31
|
+
@p_1 = Point.new(@x_1, @y_1)
|
32
|
+
@p_2 = Point.new(@x_2, @y_2)
|
33
|
+
@c = Clusterer.new([@p_1, @p_2])
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should return one cluster" do
|
37
|
+
@c.clusters.size.should == 1
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should have two points in the cluster" do
|
41
|
+
@c.clusters.first.size.should == 2
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have the first point in the cluster" do
|
45
|
+
@c.clusters.first.should include(@p_1)
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have the second point in the cluster" do
|
49
|
+
@c.clusters.first.should include(@p_2)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe Clusterer, " with three points" do
|
54
|
+
before do
|
55
|
+
@x_1, @x_2, @x_3 = 1, 5, 2
|
56
|
+
@y_1, @y_2, @y_3 = 2, 6, 3
|
57
|
+
@p_1 = Point.new(@x_1, @y_1)
|
58
|
+
@p_2 = Point.new(@x_2, @y_2)
|
59
|
+
@p_3 = Point.new(@x_3, @y_3)
|
60
|
+
@c = Clusterer.new([@p_1, @p_2, @p_3])
|
61
|
+
@cluster = @c.clusters.first
|
62
|
+
@clusters = @cluster.items.sort_by{|c|c.size}
|
63
|
+
end
|
64
|
+
|
65
|
+
it "should return one cluster" do
|
66
|
+
@c.clusters.size.should == 1
|
67
|
+
end
|
68
|
+
|
69
|
+
it "containing two items" do
|
70
|
+
@cluster.items.size.should == 2
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should have one Cluster" do
|
74
|
+
@clusters[1].class.should == Cluster
|
75
|
+
end
|
76
|
+
|
77
|
+
it "and one Point" do
|
78
|
+
@clusters[0].class.should == Point
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have the first and third points in the bigger cluster" do
|
82
|
+
@clusters[1].should include(@p_1, @p_3)
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should have the second point in the smaller cluster" do
|
86
|
+
@clusters[0].should == @p_2
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
FOUR_P = [
|
91
|
+
Point.new(0, 1),
|
92
|
+
Point.new(1, 0),
|
93
|
+
Point.new(3, 4),
|
94
|
+
Point.new(4, 3),
|
95
|
+
]
|
96
|
+
|
97
|
+
describe Clusterer, " with four points and no separation" do
|
98
|
+
before do
|
99
|
+
@c = Clusterer.new(FOUR_P)
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should return one cluster" do
|
103
|
+
@c.clusters.size.should == 1
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe Clusterer, " with four points and separation 1" do
|
108
|
+
before do
|
109
|
+
@c = Clusterer.new(FOUR_P, 1)
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should return all four individual points" do
|
113
|
+
@c.clusters.size.should == 4
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe Clusterer, " with four points and separation 2" do
|
118
|
+
before do
|
119
|
+
@c = Clusterer.new(FOUR_P, 2)
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should return two clusters" do
|
123
|
+
@c.clusters.size.should == 2
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
EIGHT_P = [
|
128
|
+
Point.new(0, 1),
|
129
|
+
Point.new(1, 0),
|
130
|
+
Point.new(3, 4),
|
131
|
+
Point.new(4, 3),
|
132
|
+
Point.new(7, 8),
|
133
|
+
Point.new(8, 7),
|
134
|
+
Point.new(8, 9),
|
135
|
+
Point.new(9, 8),
|
136
|
+
]
|
137
|
+
|
138
|
+
describe Clusterer, " with eight points and no separation" do
|
139
|
+
before do
|
140
|
+
@c = Clusterer.new(EIGHT_P)
|
141
|
+
end
|
142
|
+
|
143
|
+
it "should return one cluster when no minimum separation is given" do
|
144
|
+
@c.clusters.size.should == 1
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
describe Clusterer, " with eight points and separation 1" do
|
149
|
+
before do
|
150
|
+
@c = Clusterer.new(EIGHT_P, 1)
|
151
|
+
end
|
152
|
+
|
153
|
+
it "should return all eight individual points" do
|
154
|
+
@c.clusters.size.should == 8
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe Clusterer, " with eight points and separation 2" do
|
159
|
+
before do
|
160
|
+
@c = Clusterer.new(EIGHT_P, 2)
|
161
|
+
end
|
162
|
+
|
163
|
+
it "should return three clusters" do
|
164
|
+
@c.clusters.size.should == 3
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Distances, " with two points" do
|
5
|
+
before do
|
6
|
+
@x_1, @x_2 = 1, 5
|
7
|
+
@y_1, @y_2 = 2, 8
|
8
|
+
@p_1 = Point.new(@x_1, @y_1)
|
9
|
+
@p_2 = Point.new(@x_2, @y_2)
|
10
|
+
@d = Distances.new([@p_1, @p_2])
|
11
|
+
@dist = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should tell us the distance between those points" do
|
15
|
+
@d[@p_1, @p_2].should == @dist
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should tell us the distance between those points backwards" do
|
19
|
+
@d[@p_1, @p_2].should == @dist
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should have separation equal to distance between those points" do
|
23
|
+
@d.separation.should == @dist
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe Distances, " with three points" do
|
28
|
+
before do
|
29
|
+
@x_1, @x_2, @x_3 = 1, 5, 3
|
30
|
+
@y_1, @y_2, @y_3 = 2, 8, 4
|
31
|
+
@p_1 = Point.new(@x_1, @y_1)
|
32
|
+
@p_2 = Point.new(@x_2, @y_2)
|
33
|
+
@p_3 = Point.new(@x_3, @y_3)
|
34
|
+
@d = Distances.new([@p_1, @p_2, @p_3])
|
35
|
+
@dist_1_2 = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2)
|
36
|
+
@dist_2_3 = Math.sqrt((@x_2 - @x_3) ** 2 + (@y_2 - @y_3) ** 2)
|
37
|
+
@dist_3_1 = Math.sqrt((@x_3 - @x_1) ** 2 + (@y_3 - @y_1) ** 2)
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should tell us the nearest points" do
|
41
|
+
@d.nearest.should include(@p_1, @p_3)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should tell us the outliers" do
|
45
|
+
@d.outliers.should == [@p_2]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should have separation equal to distance between nearest points" do
|
49
|
+
@d.separation.should == @dist_3_1
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
desc 'Release the website and new gem version'
|
2
|
+
task :deploy => [:check_version, :website, :release] do
|
3
|
+
puts "Remember to create SVN tag:"
|
4
|
+
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
5
|
+
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
6
|
+
puts "Suggested comment:"
|
7
|
+
puts "Tagging release #{CHANGES}"
|
8
|
+
end
|
9
|
+
|
10
|
+
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
11
|
+
task :local_deploy => [:website_generate, :install_gem]
|
12
|
+
|
13
|
+
task :check_version do
|
14
|
+
unless ENV['VERSION']
|
15
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
unless ENV['VERSION'] == VERS
|
19
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
20
|
+
exit
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
desc 'Install the package as a gem, without generating documentation(ri/rdoc)'
|
25
|
+
task :install_gem_no_doc => [:clean, :package] do
|
26
|
+
sh "#{'sudo ' unless Hoe::WINDOZE }gem install pkg/*.gem --no-rdoc --no-ri"
|
27
|
+
end
|
28
|
+
|
29
|
+
namespace :manifest do
|
30
|
+
desc 'Recreate Manifest.txt to include ALL files'
|
31
|
+
task :refresh do
|
32
|
+
`rake check_manifest | patch -p0 > Manifest.txt`
|
33
|
+
end
|
34
|
+
end
|
data/tasks/rspec.rake
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
begin
|
2
|
+
require 'spec/rake/spectask'
|
3
|
+
rescue LoadError
|
4
|
+
begin
|
5
|
+
require 'rubygems'
|
6
|
+
require 'spec/rake/spectask'
|
7
|
+
rescue LoadError
|
8
|
+
puts <<-EOS
|
9
|
+
To use rspec for testing you must install rspec gem:
|
10
|
+
gem install rspec
|
11
|
+
EOS
|
12
|
+
exit(0)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Run the specs under spec/"
|
17
|
+
Spec::Rake::SpecTask.new do |t|
|
18
|
+
t.spec_opts = ['--options', "spec/spec.opts"]
|
19
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
20
|
+
end
|
data/tasks/website.rake
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
desc 'Generate website files'
|
2
|
+
task :website_generate => :ruby_env do
|
3
|
+
(Dir['website/**/*.txt'] - Dir['website/version*.txt']).each do |txt|
|
4
|
+
sh %{ #{RUBY_APP} script/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
desc 'Upload website files to rubyforge'
|
9
|
+
task :website_upload do
|
10
|
+
host = "#{rubyforge_username}@rubyforge.org"
|
11
|
+
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
12
|
+
local_dir = 'website'
|
13
|
+
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
14
|
+
end
|
15
|
+
|
16
|
+
desc 'Generate and upload website files'
|
17
|
+
task :website => [:website_generate, :website_upload, :publish_docs]
|
data/website/index.html
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
2
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
4
|
+
<head>
|
5
|
+
<link rel="stylesheet" href="stylesheets/screen.css" type="text/css" media="screen" />
|
6
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
7
|
+
<title>
|
8
|
+
Simple Hierarchical Clustering
|
9
|
+
</title>
|
10
|
+
<script src="javascripts/rounded_corners_lite.inc.js" type="text/javascript"></script>
|
11
|
+
<style>
|
12
|
+
|
13
|
+
</style>
|
14
|
+
<script type="text/javascript">
|
15
|
+
window.onload = function() {
|
16
|
+
settings = {
|
17
|
+
tl: { radius: 10 },
|
18
|
+
tr: { radius: 10 },
|
19
|
+
bl: { radius: 10 },
|
20
|
+
br: { radius: 10 },
|
21
|
+
antiAlias: true,
|
22
|
+
autoPad: true,
|
23
|
+
validTags: ["div"]
|
24
|
+
}
|
25
|
+
var versionBox = new curvyCorners(settings, document.getElementById("version"));
|
26
|
+
versionBox.applyCornersToAll();
|
27
|
+
}
|
28
|
+
</script>
|
29
|
+
</head>
|
30
|
+
<body>
|
31
|
+
<div id="main">
|
32
|
+
|
33
|
+
<h1>Simple Hierarchical Clustering</h1>
|
34
|
+
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/hierclust"; return false'>
|
35
|
+
<p>Get Version</p>
|
36
|
+
<a href="http://rubyforge.org/projects/hierclust" class="numbers">0.1.0</a>
|
37
|
+
</div>
|
38
|
+
<h1>→ ‘hierclust’</h1>
|
39
|
+
|
40
|
+
|
41
|
+
<h2>What</h2>
|
42
|
+
|
43
|
+
|
44
|
+
<p>Given a set of points, organizes them into clusters. You can either have it continue clustering until all the clusters are organized into larger clusters, or tell it to stop once a certain minimum level of separation between clusters has been reached.</p>
|
45
|
+
|
46
|
+
|
47
|
+
<p>Useful for taking a large set of points to be plotted on a map, and reducing them to a smaller number of clusters, separated enough so that the map remains legible.</p>
|
48
|
+
|
49
|
+
|
50
|
+
<h2>Installing</h2>
|
51
|
+
|
52
|
+
|
53
|
+
<p><pre class='syntax'><span class="ident">sudo</span> <span class="ident">gem</span> <span class="ident">install</span> <span class="ident">hierclust</span></pre></p>
|
54
|
+
|
55
|
+
|
56
|
+
<h2>The basics</h2>
|
57
|
+
|
58
|
+
|
59
|
+
<p><pre class='syntax'><span class="ident">points</span> <span class="punct">=</span> <span class="punct">(</span><span class="number">1</span><span class="punct">..</span><span class="number">6</span><span class="punct">).</span><span class="ident">map</span> <span class="punct">{</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Point</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">rand</span><span class="punct">(</span><span class="number">10</span><span class="punct">),</span> <span class="ident">rand</span><span class="punct">(</span><span class="number">10</span><span class="punct">))</span> <span class="punct">}</span>
|
60
|
+
<span class="ident">clusterer</span> <span class="punct">=</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Clusterer</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">points</span><span class="punct">)</span>
|
61
|
+
<span class="ident">puts</span> <span class="ident">clusterer</span><span class="punct">.</span><span class="ident">clusters</span> <span class="comment"># => [[[(4, 9), (4, 8)], (9, 6)], [[(1, 4), (3, 1)], (6, 3)]]</span></pre></p>
|
62
|
+
|
63
|
+
|
64
|
+
<h2>Demonstration of usage</h2>
|
65
|
+
|
66
|
+
|
67
|
+
<p>Let’s say you have an existing set of objects with latitudes and longitudes, and you want to organize them into clusters that are separated by at least 5 degrees (for simplicity’s sake we’ll pretend that latitudes and longitude form a rectangular grid).</p>
|
68
|
+
|
69
|
+
|
70
|
+
<p><pre class='syntax'><span class="ident">require</span> <span class="punct">'</span><span class="string">hierclust</span><span class="punct">'</span></pre></p>
|
71
|
+
|
72
|
+
|
73
|
+
<p>Start by extending the built-in Point class so that it can maintain a reference
|
74
|
+
to your data:</p>
|
75
|
+
|
76
|
+
|
77
|
+
<p><pre class='syntax'><span class="keyword">class </span><span class="class">Hierclust::Point</span>
|
78
|
+
<span class="ident">attr_accessor</span> <span class="symbol">:data</span>
|
79
|
+
<span class="keyword">end</span></pre></p>
|
80
|
+
|
81
|
+
|
82
|
+
<p>Then turn your data into a set of points:</p>
|
83
|
+
|
84
|
+
|
85
|
+
<p><pre class='syntax'><span class="ident">dataset</span> <span class="punct">=</span> <span class="constant">MyGeocodedThing</span><span class="punct">.</span><span class="ident">find</span><span class="punct">(</span><span class="symbol">:all</span><span class="punct">)</span>
|
86
|
+
<span class="ident">points</span> <span class="punct">=</span> <span class="ident">dataset</span><span class="punct">.</span><span class="ident">map</span> <span class="keyword">do</span> <span class="punct">|</span><span class="ident">thing</span><span class="punct">|</span>
|
87
|
+
<span class="ident">point</span> <span class="punct">=</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Point</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">thing</span><span class="punct">.</span><span class="ident">lon</span><span class="punct">,</span> <span class="ident">thing</span><span class="punct">.</span><span class="ident">lat</span><span class="punct">)</span>
|
88
|
+
<span class="ident">point</span><span class="punct">.</span><span class="ident">data</span> <span class="punct">=</span> <span class="ident">thing</span>
|
89
|
+
<span class="ident">point</span>
|
90
|
+
<span class="keyword">end</span></pre></p>
|
91
|
+
|
92
|
+
|
93
|
+
<p>Then tell Hierclust to cluster those points to at least 5 degrees separation:</p>
|
94
|
+
|
95
|
+
|
96
|
+
<p><pre class='syntax'><span class="ident">clusterer</span> <span class="punct">=</span> <span class="constant">Hierclust</span><span class="punct">::</span><span class="constant">Clusterer</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span><span class="ident">points</span><span class="punct">,</span> <span class="number">5</span><span class="punct">)</span>
|
97
|
+
<span class="ident">clusters</span> <span class="punct">=</span> <span class="ident">clusterer</span><span class="punct">.</span><span class="ident">clusters</span></pre></p>
|
98
|
+
|
99
|
+
|
100
|
+
<p>Then do what you will with your clusters:</p>
|
101
|
+
|
102
|
+
|
103
|
+
<p><pre class='syntax'><span class="ident">map</span> <span class="punct">=</span> <span class="constant">MapThing</span><span class="punct">.</span><span class="ident">new</span>
|
104
|
+
<span class="ident">clusters</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span> <span class="punct">|</span><span class="ident">cluster</span><span class="punct">|</span>
|
105
|
+
<span class="ident">map</span><span class="punct">.</span><span class="ident">add_point</span><span class="punct">(</span>
|
106
|
+
<span class="ident">x</span> <span class="punct">=></span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">x</span><span class="punct">,</span>
|
107
|
+
<span class="ident">y</span> <span class="punct">=></span> <span class="ident">cluster</span><span class="punct">.</span><span class="ident">y</span><span class="punct">,</span>
|
108
|
+
<span class="ident">label</span> <span class="punct">=></span> <span class="punct">"</span><span class="string"><span class="expr">#{cluster.size}</span> Things</span><span class="punct">"</span>
|
109
|
+
<span class="punct">)</span>
|
110
|
+
<span class="keyword">end</span></pre></p>
|
111
|
+
|
112
|
+
|
113
|
+
<h2>Forum</h2>
|
114
|
+
|
115
|
+
|
116
|
+
<p><a href="http://groups.google.com/group/hierclust">http://groups.google.com/group/hierclust</a></p>
|
117
|
+
|
118
|
+
|
119
|
+
<h2>How to submit patches</h2>
|
120
|
+
|
121
|
+
|
122
|
+
<p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people’s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
|
123
|
+
|
124
|
+
|
125
|
+
<p>The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.</p>
|
126
|
+
|
127
|
+
|
128
|
+
<h2>License</h2>
|
129
|
+
|
130
|
+
|
131
|
+
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
132
|
+
|
133
|
+
|
134
|
+
<h2>Contact</h2>
|
135
|
+
|
136
|
+
|
137
|
+
<p>Comments are welcome. Send an email to <a href="mailto:FIXME"><span class="caps">FIXME</span> full name</a> email via the <a href="http://groups.google.com/group/hierclust">forum</a></p>
|
138
|
+
<p class="coda">
|
139
|
+
<a href="FIXME email">FIXME full name</a>, 2nd February 2008<br>
|
140
|
+
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
141
|
+
</p>
|
142
|
+
</div>
|
143
|
+
|
144
|
+
<!-- insert site tracking codes here, like Google Urchin -->
|
145
|
+
|
146
|
+
</body>
|
147
|
+
</html>
|
data/website/index.txt
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
h1. Simple Hierarchical Clustering
|
2
|
+
|
3
|
+
h1. → 'hierclust'
|
4
|
+
|
5
|
+
h2. What
|
6
|
+
|
7
|
+
Given a set of points, organizes them into clusters. You can either have it continue clustering until all the clusters are organized into larger clusters, or tell it to stop once a certain minimum level of separation between clusters has been reached.
|
8
|
+
|
9
|
+
Useful for taking a large set of points to be plotted on a map, and reducing them to a smaller number of clusters, separated enough so that the map remains legible.
|
10
|
+
|
11
|
+
h2. Installing
|
12
|
+
|
13
|
+
<pre syntax="ruby">sudo gem install hierclust</pre>
|
14
|
+
|
15
|
+
h2. The basics
|
16
|
+
|
17
|
+
<pre syntax="ruby">points = (1..6).map { Hierclust::Point.new(rand(10), rand(10)) }
|
18
|
+
clusterer = Hierclust::Clusterer.new(points)
|
19
|
+
puts clusterer.clusters # => [[[(4, 9), (4, 8)], (9, 6)], [[(1, 4), (3, 1)], (6, 3)]]</pre>
|
20
|
+
|
21
|
+
h2. Demonstration of usage
|
22
|
+
|
23
|
+
Let's say you have an existing set of objects with latitudes and longitudes, and you want to organize them into clusters that are separated by at least 5 degrees (for simplicity's sake we'll pretend that latitudes and longitude form a rectangular grid).
|
24
|
+
|
25
|
+
<pre syntax="ruby">require 'hierclust'</pre>
|
26
|
+
|
27
|
+
Start by extending the built-in Point class so that it can maintain a reference
|
28
|
+
to your data:
|
29
|
+
|
30
|
+
<pre syntax="ruby">class Hierclust::Point
|
31
|
+
attr_accessor :data
|
32
|
+
end</pre>
|
33
|
+
|
34
|
+
Then turn your data into a set of points:
|
35
|
+
|
36
|
+
<pre syntax="ruby">dataset = MyGeocodedThing.find(:all)
|
37
|
+
points = dataset.map do |thing|
|
38
|
+
point = Hierclust::Point.new(thing.lon, thing.lat)
|
39
|
+
point.data = thing
|
40
|
+
point
|
41
|
+
end</pre>
|
42
|
+
|
43
|
+
Then tell Hierclust to cluster those points to at least 5 degrees separation:
|
44
|
+
|
45
|
+
<pre syntax="ruby">clusterer = Hierclust::Clusterer.new(points, 5)
|
46
|
+
clusters = clusterer.clusters</pre>
|
47
|
+
|
48
|
+
Then do what you will with your clusters:
|
49
|
+
|
50
|
+
<pre syntax="ruby">map = MapThing.new
|
51
|
+
clusters.each do |cluster|
|
52
|
+
map.add_point(
|
53
|
+
x => cluster.x,
|
54
|
+
y => cluster.y,
|
55
|
+
label => "#{cluster.size} Things"
|
56
|
+
)
|
57
|
+
end</pre>
|
58
|
+
|
59
|
+
h2. Forum
|
60
|
+
|
61
|
+
"http://groups.google.com/group/hierclust":http://groups.google.com/group/hierclust
|
62
|
+
|
63
|
+
h2. How to submit patches
|
64
|
+
|
65
|
+
Read the "8 steps for fixing other people's code":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/ and for section "8b: Submit patch to Google Groups":http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups, use the Google Group above.
|
66
|
+
|
67
|
+
The trunk repository is <code>svn://rubyforge.org/var/svn/hierclust/trunk</code> for anonymous access.
|
68
|
+
|
69
|
+
h2. License
|
70
|
+
|
71
|
+
This code is free to use under the terms of the MIT license.
|
72
|
+
|
73
|
+
h2. Contact
|
74
|
+
|
75
|
+
Comments are welcome. Send an email to "FIXME full name":mailto:FIXME email via the "forum":http://groups.google.com/group/hierclust
|
76
|
+
|