Empact-hierclust 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +21 -0
- data/Empact-hierclust.gemspec +82 -0
- data/History.txt +47 -0
- data/LICENSE +20 -0
- data/README.rdoc +28 -0
- data/Rakefile +49 -0
- data/VERSION +1 -0
- data/examples/visualize.rb +42 -0
- data/lib/hierclust.rb +6 -0
- data/lib/hierclust/cluster.rb +61 -0
- data/lib/hierclust/clusterer.rb +70 -0
- data/lib/hierclust/distances.rb +47 -0
- data/lib/hierclust/point.rb +53 -0
- data/log/debug.log +0 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/spec/hierclust/cluster_spec.rb +90 -0
- data/spec/hierclust/clusterer_spec.rb +208 -0
- data/spec/hierclust/distances_spec.rb +44 -0
- data/spec/hierclust/point_spec.rb +22 -0
- data/spec/hierclust_spec.rb +4 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/rspec.rake +21 -0
- data/tasks/website.rake +17 -0
- data/website/index.txt +82 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +121 -0
@@ -0,0 +1,53 @@
|
|
1
|
+
module Hierclust
|
2
|
+
# A Point represents a single point in n-dimensional space.
|
3
|
+
class Point
|
4
|
+
# x-coordinate
|
5
|
+
attr_accessor :coordinates
|
6
|
+
|
7
|
+
# Create a new Point with the given coordinates.
|
8
|
+
def initialize(*coordinates)
|
9
|
+
@coordinates = coordinates
|
10
|
+
end
|
11
|
+
|
12
|
+
# Returns this distance from this Point to an +other+ Point.
|
13
|
+
def distance_to(other)
|
14
|
+
sum_of_squares = coordinates.zip(other.coordinates).map do |point, other_point|
|
15
|
+
(other_point - point) ** 2
|
16
|
+
end.inject(0) {|sum, distance| sum + distance }
|
17
|
+
Math.sqrt(sum_of_squares)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Simplifies code by letting us treat Clusters and Points interchangeably
|
21
|
+
def size #:nodoc:
|
22
|
+
1
|
23
|
+
end
|
24
|
+
|
25
|
+
# Simplifies code by letting us treat Clusters and Points interchangeably
|
26
|
+
def radius #:nodoc:
|
27
|
+
0
|
28
|
+
end
|
29
|
+
|
30
|
+
# Simplifies code by letting us treat Clusters and Points interchangeably
|
31
|
+
def points #:nodoc:
|
32
|
+
[self]
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns a legible representation of this Point.
|
36
|
+
def to_s
|
37
|
+
"(#{coordinates.join(', ')})"
|
38
|
+
end
|
39
|
+
|
40
|
+
# Sorts points relative to each other on the x-axis.
|
41
|
+
#
|
42
|
+
# Uses y-axis as a tie-breaker, so that sorting is stable even if
|
43
|
+
# multiple points have the same x-coordinate.
|
44
|
+
#
|
45
|
+
# Uses object_id as a final tie-breaker, so sorts are guaranteed to
|
46
|
+
# be stable even when multiple points have the same coordinates.
|
47
|
+
def <=>(other)
|
48
|
+
cmp = coordinates <=> other.coordinates
|
49
|
+
cmp = object_id <=> other.object_id if cmp == 0
|
50
|
+
cmp
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
data/log/debug.log
ADDED
File without changes
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.join(File.dirname(__FILE__), '..')
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.join(File.dirname(__FILE__), '..')
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/script/txt2html
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
begin
|
5
|
+
require 'newgem'
|
6
|
+
rescue LoadError
|
7
|
+
puts "\n\nGenerating the website requires the newgem RubyGem"
|
8
|
+
puts "Install: gem install newgem\n\n"
|
9
|
+
exit(1)
|
10
|
+
end
|
11
|
+
require 'redcloth'
|
12
|
+
require 'syntax/convertors/html'
|
13
|
+
require 'erb'
|
14
|
+
require File.dirname(__FILE__) + '/../lib/hierclust/version.rb'
|
15
|
+
|
16
|
+
version = File.exist?('../VERSION') ? File.read('../VERSION') : ""
|
17
|
+
download = 'http://rubyforge.org/projects/hierclust'
|
18
|
+
|
19
|
+
class Fixnum
|
20
|
+
def ordinal
|
21
|
+
# teens
|
22
|
+
return 'th' if (10..19).include?(self % 100)
|
23
|
+
# others
|
24
|
+
case self % 10
|
25
|
+
when 1: return 'st'
|
26
|
+
when 2: return 'nd'
|
27
|
+
when 3: return 'rd'
|
28
|
+
else return 'th'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class Time
|
34
|
+
def pretty
|
35
|
+
return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def convert_syntax(syntax, source)
|
40
|
+
return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
|
41
|
+
end
|
42
|
+
|
43
|
+
if ARGV.length >= 1
|
44
|
+
src, template = ARGV
|
45
|
+
template ||= File.join(File.dirname(__FILE__), '/../website/template.rhtml')
|
46
|
+
|
47
|
+
else
|
48
|
+
puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
|
49
|
+
exit!
|
50
|
+
end
|
51
|
+
|
52
|
+
template = ERB.new(File.open(template).read)
|
53
|
+
|
54
|
+
title = nil
|
55
|
+
body = nil
|
56
|
+
File.open(src) do |fsrc|
|
57
|
+
title_text = fsrc.readline
|
58
|
+
body_text = fsrc.read
|
59
|
+
syntax_items = []
|
60
|
+
body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</\1>!m){
|
61
|
+
ident = syntax_items.length
|
62
|
+
element, syntax, source = $1, $2, $3
|
63
|
+
syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
|
64
|
+
"syntax-temp-#{ident}"
|
65
|
+
}
|
66
|
+
title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
|
67
|
+
body = RedCloth.new(body_text).to_html
|
68
|
+
body.gsub!(%r!(?:<pre><code>)?syntax-temp-(\d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
|
69
|
+
end
|
70
|
+
stat = File.stat(src)
|
71
|
+
created = stat.ctime
|
72
|
+
modified = stat.mtime
|
73
|
+
|
74
|
+
$stdout << template.result(binding)
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Cluster, " with no points" do
|
5
|
+
before do
|
6
|
+
@c = Cluster.new([])
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should have nil x-coordinate" do
|
10
|
+
@c.coordinates.should be_nil
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should have nil radius" do
|
14
|
+
@c.radius.should be_nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe Cluster, " with one point" do
|
19
|
+
before do
|
20
|
+
@x = 123
|
21
|
+
@y = 234
|
22
|
+
@p = Point.new(@x, @y)
|
23
|
+
@c = Cluster.new([@p])
|
24
|
+
end
|
25
|
+
|
26
|
+
it "should have the same coordinates as the point" do
|
27
|
+
@c.coordinates.should == @p.coordinates
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should have the same coordinates as used to create the point" do
|
31
|
+
@c.coordinates.should == [@x, @y]
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should have 0 radius" do
|
35
|
+
@c.radius.should == 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe Cluster, " with two points" do
|
40
|
+
before do
|
41
|
+
@x_1, @x_2 = 5, 15
|
42
|
+
@y_1, @y_2 = 4, 8
|
43
|
+
@p_1 = Point.new(@x_1, @y_1)
|
44
|
+
@p_2 = Point.new(@x_2, @y_2)
|
45
|
+
@c = Cluster.new([@p_1, @p_2])
|
46
|
+
@points = @c.points
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should have coordinates at the average of points' coordinates" do
|
50
|
+
@c.coordinates.should == [10, 6]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should have two points" do
|
54
|
+
@points.size.should == 2
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should include both points" do
|
58
|
+
@points.should include(@p_1, @p_2)
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should have correct radius" do
|
62
|
+
radius = Math.sqrt((@x_1 - @x_2) ** 2 + (@y_1 - @y_2) ** 2) / 2.0
|
63
|
+
@c.radius.should == radius
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
describe Cluster, " with one point and one cluster" do
|
68
|
+
before do
|
69
|
+
@x_1, @x_2, @x_3 = 1, 2, 3
|
70
|
+
@y_1, @y_2, @y_3 = 2, 2, 5
|
71
|
+
@p_1 = Point.new(@x_1, @y_1)
|
72
|
+
@p_2 = Point.new(@x_2, @y_2)
|
73
|
+
@p_3 = Point.new(@x_3, @y_3)
|
74
|
+
@c_1 = Cluster.new([@p_1, @p_2])
|
75
|
+
@c_2 = Cluster.new([@p_3, @c_1])
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should have two items" do
|
79
|
+
@c_2.items.size.should == 2
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should have three points" do
|
83
|
+
@c_2.points.size.should == 3
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should have coordinates at the average of points' coordinates" do
|
87
|
+
@c_2.coordinates.should == [2, 3]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,208 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), '..', 'spec_helper.rb')
|
2
|
+
|
3
|
+
module Hierclust
|
4
|
+
describe Clusterer do
|
5
|
+
describe "with no data" do
|
6
|
+
before do
|
7
|
+
@c = Clusterer.new([])
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should return no clusters" do
|
11
|
+
@c.clusters.should == []
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe "with one point" do
|
16
|
+
before do
|
17
|
+
@x = 1
|
18
|
+
@y = 2
|
19
|
+
@p = Point.new(@x, @y)
|
20
|
+
@c = Clusterer.new([@p])
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should return the point" do
|
24
|
+
@c.clusters.should == [@p]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "with two points" do
|
29
|
+
before do
|
30
|
+
@x_1, @x_2 = 1, 5
|
31
|
+
@y_1, @y_2 = 2, 8
|
32
|
+
@p_1 = Point.new(@x_1, @y_1)
|
33
|
+
@p_2 = Point.new(@x_2, @y_2)
|
34
|
+
@c = Clusterer.new([@p_1, @p_2])
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should return one cluster" do
|
38
|
+
@c.clusters.size.should == 1
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should have two points in the cluster" do
|
42
|
+
@c.clusters.first.size.should == 2
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should have the first point in the cluster" do
|
46
|
+
@c.clusters.first.should include(@p_1)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should have the second point in the cluster" do
|
50
|
+
@c.clusters.first.should include(@p_2)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "with three points" do
|
55
|
+
before do
|
56
|
+
@x_1, @x_2, @x_3 = 1, 5, 2
|
57
|
+
@y_1, @y_2, @y_3 = 2, 6, 3
|
58
|
+
@p_1 = Point.new(@x_1, @y_1)
|
59
|
+
@p_2 = Point.new(@x_2, @y_2)
|
60
|
+
@p_3 = Point.new(@x_3, @y_3)
|
61
|
+
@c = Clusterer.new([@p_1, @p_2, @p_3])
|
62
|
+
@cluster = @c.clusters.first
|
63
|
+
@clusters = @cluster.items.sort_by{|c|c.size}
|
64
|
+
end
|
65
|
+
|
66
|
+
it "should return one cluster" do
|
67
|
+
@c.clusters.size.should == 1
|
68
|
+
end
|
69
|
+
|
70
|
+
it "containing two items" do
|
71
|
+
@cluster.items.size.should == 2
|
72
|
+
end
|
73
|
+
|
74
|
+
it "should have one Cluster" do
|
75
|
+
@clusters[1].class.should == Cluster
|
76
|
+
end
|
77
|
+
|
78
|
+
it "and one Point" do
|
79
|
+
@clusters[0].class.should == Point
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should have the first and third points in the bigger cluster" do
|
83
|
+
@clusters[1].should include(@p_1, @p_3)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "should have the second point in the smaller cluster" do
|
87
|
+
@clusters[0].should == @p_2
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
describe "with four points" do
|
92
|
+
before do
|
93
|
+
@points = [
|
94
|
+
Point.new(0, 1),
|
95
|
+
Point.new(1, 0),
|
96
|
+
Point.new(3, 4),
|
97
|
+
Point.new(4, 3),
|
98
|
+
]
|
99
|
+
end
|
100
|
+
|
101
|
+
describe "and no separation" do
|
102
|
+
before do
|
103
|
+
@c = Clusterer.new(@points)
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should return one cluster" do
|
107
|
+
@c.clusters.size.should == 1
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "and separation 1" do
|
112
|
+
before do
|
113
|
+
@c = Clusterer.new(@points, 1)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should return all four individual points" do
|
117
|
+
@c.clusters.size.should == 4
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe "and separation 2" do
|
122
|
+
before do
|
123
|
+
@c = Clusterer.new(@points, 2)
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should return two clusters" do
|
127
|
+
@c.clusters.size.should == 2
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe "with eight points" do
|
133
|
+
before do
|
134
|
+
@points = [
|
135
|
+
Point.new(0, 1),
|
136
|
+
Point.new(1, 0),
|
137
|
+
Point.new(3, 4),
|
138
|
+
Point.new(4, 3),
|
139
|
+
Point.new(7, 8),
|
140
|
+
Point.new(8, 7),
|
141
|
+
Point.new(8, 9),
|
142
|
+
Point.new(9, 8),
|
143
|
+
]
|
144
|
+
end
|
145
|
+
|
146
|
+
describe "and no separation" do
|
147
|
+
before do
|
148
|
+
@clusters = Clusterer.new(@points).clusters.sort
|
149
|
+
end
|
150
|
+
|
151
|
+
it "should return one cluster when no minimum separation is given" do
|
152
|
+
@clusters.size.should == 1
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
describe "and separation 1" do
|
157
|
+
before do
|
158
|
+
@clusters = Clusterer.new(@points, 1).clusters.sort
|
159
|
+
end
|
160
|
+
|
161
|
+
it "should have all eight points in individual clusters" do
|
162
|
+
@clusters.size.should == 8
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
describe "and separation 3" do
|
167
|
+
describe "with no resolution limit" do
|
168
|
+
before do
|
169
|
+
@clusters = Clusterer.new(@points, 3).clusters.sort
|
170
|
+
end
|
171
|
+
|
172
|
+
it "should have three clusters" do
|
173
|
+
@clusters.size.should == 3
|
174
|
+
end
|
175
|
+
|
176
|
+
it "should have clusters size 2, 2, and 4 " do
|
177
|
+
@clusters[0].points.size.should == 2
|
178
|
+
@clusters[1].points.size.should == 2
|
179
|
+
@clusters[2].points.size.should == 4
|
180
|
+
end
|
181
|
+
|
182
|
+
it "should have 2 items in large cluster" do
|
183
|
+
@clusters[2].items.size.should == 2
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
describe "with coarse resolution" do
|
188
|
+
before do
|
189
|
+
@clusters = Clusterer.new(@points, 3, 5).clusters.sort
|
190
|
+
end
|
191
|
+
|
192
|
+
it "should have three clusters" do
|
193
|
+
@clusters.size.should == 2
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should have clusters size 2, 2, and 4 " do
|
197
|
+
@clusters[0].points.size.should == 4
|
198
|
+
@clusters[1].points.size.should == 4
|
199
|
+
end
|
200
|
+
|
201
|
+
it "should have 4 items in large cluster" do
|
202
|
+
@clusters[1].items.size.should == 4
|
203
|
+
end
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|