hierclust 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +34 -0
- data/README.txt +14 -0
- data/Rakefile +4 -0
- data/config/hoe.rb +71 -0
- data/config/requirements.rb +17 -0
- data/lib/hierclust/cluster.rb +43 -0
- data/lib/hierclust/clusterer.rb +47 -0
- data/lib/hierclust/distances.rb +56 -0
- data/lib/hierclust/point.rb +31 -0
- data/lib/hierclust/version.rb +9 -0
- data/lib/hierclust.rb +9 -0
- data/log/debug.log +0 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/spec/hierclust/cluster_spec.rb +52 -0
- data/spec/hierclust/clusterer_spec.rb +167 -0
- data/spec/hierclust/distances_spec.rb +52 -0
- data/spec/hierclust/point_spec.rb +12 -0
- data/spec/hierclust_spec.rb +4 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/rspec.rake +20 -0
- data/tasks/website.rake +17 -0
- data/website/index.html +147 -0
- data/website/index.txt +76 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +95 -0
data/History.txt
ADDED
data/License.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Brandt Kurowski <brandt@kurowski.net>
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
History.txt
|
2
|
+
License.txt
|
3
|
+
Manifest.txt
|
4
|
+
README.txt
|
5
|
+
Rakefile
|
6
|
+
config/hoe.rb
|
7
|
+
config/requirements.rb
|
8
|
+
lib/hierclust.rb
|
9
|
+
lib/hierclust/cluster.rb
|
10
|
+
lib/hierclust/clusterer.rb
|
11
|
+
lib/hierclust/distances.rb
|
12
|
+
lib/hierclust/point.rb
|
13
|
+
lib/hierclust/version.rb
|
14
|
+
log/debug.log
|
15
|
+
script/destroy
|
16
|
+
script/generate
|
17
|
+
script/txt2html
|
18
|
+
setup.rb
|
19
|
+
spec/hierclust/cluster_spec.rb
|
20
|
+
spec/hierclust/clusterer_spec.rb
|
21
|
+
spec/hierclust/distances_spec.rb
|
22
|
+
spec/hierclust/point_spec.rb
|
23
|
+
spec/hierclust_spec.rb
|
24
|
+
spec/spec.opts
|
25
|
+
spec/spec_helper.rb
|
26
|
+
tasks/deployment.rake
|
27
|
+
tasks/environment.rake
|
28
|
+
tasks/rspec.rake
|
29
|
+
tasks/website.rake
|
30
|
+
website/index.html
|
31
|
+
website/index.txt
|
32
|
+
website/javascripts/rounded_corners_lite.inc.js
|
33
|
+
website/stylesheets/screen.css
|
34
|
+
website/template.rhtml
|
data/README.txt
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
= Hierclust
|
2
|
+
|
3
|
+
A simple hierarchical clustering library for spatial data.
|
4
|
+
|
5
|
+
== Example
|
6
|
+
|
7
|
+
require 'hierclust'
|
8
|
+
points = (1..6).map { Hierclust::Point.new(rand(10), rand(10)) }
|
9
|
+
clusterer = Hierclust::Clusterer.new(points)
|
10
|
+
puts clusterer.clusters => [[[(4, 9), (4, 8)], (9, 6)], [[(1, 4), (3, 1)], (6, 3)]]
|
11
|
+
|
12
|
+
== Contact
|
13
|
+
|
14
|
+
Brandt Kurowski <brandt@kurowski.net>
|
data/Rakefile
ADDED
data/config/hoe.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'hierclust/version'
|
2
|
+
|
3
|
+
AUTHOR = 'Brandt Kurowski' # can also be an array of Authors
|
4
|
+
EMAIL = "brandt@kurowski.net"
|
5
|
+
DESCRIPTION = "performs hierarchical clustering on geometric points"
|
6
|
+
GEM_NAME = 'hierclust' # what ppl will type to install your gem
|
7
|
+
RUBYFORGE_PROJECT = 'hierclust' # The unix name for your project
|
8
|
+
HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
|
9
|
+
DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
|
10
|
+
|
11
|
+
@config_file = "~/.rubyforge/user-config.yml"
|
12
|
+
@config = nil
|
13
|
+
RUBYFORGE_USERNAME = "kurowski"
|
14
|
+
def rubyforge_username
|
15
|
+
unless @config
|
16
|
+
begin
|
17
|
+
@config = YAML.load(File.read(File.expand_path(@config_file)))
|
18
|
+
rescue
|
19
|
+
puts <<-EOS
|
20
|
+
ERROR: No rubyforge config file found: #{@config_file}
|
21
|
+
Run 'rubyforge setup' to prepare your env for access to Rubyforge
|
22
|
+
- See http://newgem.rubyforge.org/rubyforge.html for more details
|
23
|
+
EOS
|
24
|
+
exit
|
25
|
+
end
|
26
|
+
end
|
27
|
+
RUBYFORGE_USERNAME.replace @config["username"]
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
REV = nil
|
32
|
+
# UNCOMMENT IF REQUIRED:
|
33
|
+
# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
|
34
|
+
VERS = Hierclust::VERSION::STRING + (REV ? ".#{REV}" : "")
|
35
|
+
RDOC_OPTS = ['--quiet', '--title', 'hierclust documentation',
|
36
|
+
"--opname", "index.html",
|
37
|
+
"--line-numbers",
|
38
|
+
"--main", "README",
|
39
|
+
"--inline-source"]
|
40
|
+
|
41
|
+
class Hoe
|
42
|
+
def extra_deps
|
43
|
+
@extra_deps.reject! { |x| Array(x).first == 'hoe' }
|
44
|
+
@extra_deps
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Generate all the Rake tasks
|
49
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
50
|
+
hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
51
|
+
p.author = AUTHOR
|
52
|
+
p.description = DESCRIPTION
|
53
|
+
p.email = EMAIL
|
54
|
+
p.summary = DESCRIPTION
|
55
|
+
p.url = HOMEPATH
|
56
|
+
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
57
|
+
p.test_globs = ["spec/**/*_spec.rb"]
|
58
|
+
p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
|
59
|
+
|
60
|
+
# == Optional
|
61
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
62
|
+
#p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
|
63
|
+
|
64
|
+
#p.spec_extras = {} # A hash of extra values to set in the gemspec.
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
|
69
|
+
PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
|
70
|
+
hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
|
71
|
+
hoe.rsync_args = '-av --delete --ignore-errors'
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
include FileUtils
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
%w[rake hoe newgem rubigen].each do |req_gem|
|
6
|
+
begin
|
7
|
+
require req_gem
|
8
|
+
rescue LoadError
|
9
|
+
puts "This Rakefile requires the '#{req_gem}' RubyGem."
|
10
|
+
puts "Installation: gem install #{req_gem} -y"
|
11
|
+
exit
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
$:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
|
16
|
+
|
17
|
+
require 'hierclust'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Hierclust
|
2
|
+
# A Cluster represents a collection of Points. A Cluster has it's own
|
3
|
+
# coordinates that are the mean of the coordinates of it's points.
|
4
|
+
# Because a Cluster has coordinates, it can act as a Point and therefore
|
5
|
+
# be included in other Clusters.
|
6
|
+
class Cluster < Point
|
7
|
+
# An array of items in this cluster
|
8
|
+
attr_accessor :items
|
9
|
+
|
10
|
+
# Create a Cluster for the given set of +items+.
|
11
|
+
def initialize(items)
|
12
|
+
@items = items
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the average x-coordinates of all items in this Cluster.
|
16
|
+
def x
|
17
|
+
return nil if size == 0
|
18
|
+
@x ||= @items.inject(0) {|sum, p| sum + p.x} / size
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns the average y-coordinates of all items in this Cluster.
|
22
|
+
def y
|
23
|
+
return nil if size == 0
|
24
|
+
@y ||= @items.inject(0) {|sum, p| sum + p.y} / size
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the number of items in this Cluster.
|
28
|
+
def size
|
29
|
+
@items.size
|
30
|
+
end
|
31
|
+
|
32
|
+
# Returns +true+ if this Cluster includes the given +item+, otherwise
|
33
|
+
# returns +false+.
|
34
|
+
def include?(item)
|
35
|
+
@items.include? item
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns a legible representation of this Cluster and it's items.
|
39
|
+
def to_s
|
40
|
+
"[#{@items.join(', ')}]"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module Hierclust
|
2
|
+
# Clusters a set of Points using Hierarchical Clustering, stopping either
|
3
|
+
# when the hierarchy is complete or the clusters are separated by a given
|
4
|
+
# minimum distance.
|
5
|
+
class Clusterer
|
6
|
+
# The Distances for the items being clustered
|
7
|
+
attr_reader :distances
|
8
|
+
|
9
|
+
# Create a new Clusterer for the given data.
|
10
|
+
#
|
11
|
+
# Specify +separation+ to stop the clustering process once all the
|
12
|
+
# items are at least +separation+ units apart.
|
13
|
+
def initialize(data, separation = nil)
|
14
|
+
@data = data.dup
|
15
|
+
@separation = separation
|
16
|
+
@distances = Distances.new(@data)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Calculates and returns the set of clusters.
|
20
|
+
def clusters
|
21
|
+
return @data if @separation && @distances.separation > @separation
|
22
|
+
while @data.length > 1
|
23
|
+
@distances = Distances.new(@data)
|
24
|
+
return @data if @separation && @distances.separation > @separation
|
25
|
+
@data = find_cluster
|
26
|
+
end
|
27
|
+
@data
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def find_cluster
|
33
|
+
case @data.length
|
34
|
+
when 0
|
35
|
+
[]
|
36
|
+
when 1
|
37
|
+
[Cluster.new([@data[0]])]
|
38
|
+
when 2
|
39
|
+
[Cluster.new([@data[0], @data[1]])]
|
40
|
+
else
|
41
|
+
nearest = @distances.nearest
|
42
|
+
outliers = @distances.outliers
|
43
|
+
[Cluster.new(nearest), *outliers]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Hierclust
|
2
|
+
# Represents the pair-wise distances between a set of items.
|
3
|
+
class Distances
|
4
|
+
# Create a new Distances for the given +items+
|
5
|
+
def initialize(items)
|
6
|
+
@items = items
|
7
|
+
@distances = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
# Returns the distance between items +a+ and +b+.
|
11
|
+
def [](a, b)
|
12
|
+
key = (a.object_id < b.object_id) ?
|
13
|
+
"#{a.object_id},#{b.object_id}" :
|
14
|
+
"#{b.object_id},#{a.object_id}"
|
15
|
+
@distances[key] ||= Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the pair of items that are nearest to each other.
|
19
|
+
def nearest
|
20
|
+
@nearest ||= begin
|
21
|
+
key = nearest_pair[0]
|
22
|
+
a_id, b_id = key.split ','
|
23
|
+
a = @items.select {|item| item.object_id.to_s == a_id}.first
|
24
|
+
b = @items.select {|item| item.object_id.to_s == b_id}.first
|
25
|
+
[a, b]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns all items except the pair that are nearest to each other.
|
30
|
+
def outliers
|
31
|
+
@outliers ||= begin
|
32
|
+
@items - nearest
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the smallest distance between any pair of items.
|
37
|
+
def separation
|
38
|
+
@separation ||= @items.size < 2 ? 0 : nearest_pair[1]
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def nearest_pair
|
44
|
+
prepopulate_cache
|
45
|
+
@distances.to_a.sort{|a, b| a[1] <=> b[1]}.first
|
46
|
+
end
|
47
|
+
|
48
|
+
def prepopulate_cache
|
49
|
+
items = @items.dup
|
50
|
+
while !items.empty?
|
51
|
+
origin = items.shift
|
52
|
+
items.each {|item| self[origin, item]}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Hierclust
|
2
|
+
# A Point represents a single point in 2-dimensional space.
|
3
|
+
class Point
|
4
|
+
# x-coordinate
|
5
|
+
attr_accessor :x
|
6
|
+
|
7
|
+
# y-coordinate
|
8
|
+
attr_accessor :y
|
9
|
+
|
10
|
+
# Create a new Point with the given x- and y-coordinates.
|
11
|
+
def initialize(x, y)
|
12
|
+
@x = x
|
13
|
+
@y = y
|
14
|
+
end
|
15
|
+
|
16
|
+
# Simplifies code by letting us treat Clusters and Points interchangeably
|
17
|
+
def size #:nodoc:
|
18
|
+
1
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns a legible representation of this Point.
|
22
|
+
def to_s
|
23
|
+
"(#{x}, #{y})"
|
24
|
+
end
|
25
|
+
|
26
|
+
# Sorts points relative to each other on the x-axis.
|
27
|
+
def <=>(other)
|
28
|
+
return self.x <=> other.x
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/hierclust.rb
ADDED
data/log/debug.log
ADDED
File without changes
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.join(File.dirname(__FILE__), '..')
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.join(File.dirname(__FILE__), '..')
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
data/script/txt2html
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
begin
|
5
|
+
require 'newgem'
|
6
|
+
rescue LoadError
|
7
|
+
puts "\n\nGenerating the website requires the newgem RubyGem"
|
8
|
+
puts "Install: gem install newgem\n\n"
|
9
|
+
exit(1)
|
10
|
+
end
|
11
|
+
require 'redcloth'
|
12
|
+
require 'syntax/convertors/html'
|
13
|
+
require 'erb'
|
14
|
+
require File.dirname(__FILE__) + '/../lib/hierclust/version.rb'
|
15
|
+
|
16
|
+
version = Hierclust::VERSION::STRING
|
17
|
+
download = 'http://rubyforge.org/projects/hierclust'
|
18
|
+
|
19
|
+
class Fixnum
|
20
|
+
def ordinal
|
21
|
+
# teens
|
22
|
+
return 'th' if (10..19).include?(self % 100)
|
23
|
+
# others
|
24
|
+
case self % 10
|
25
|
+
when 1: return 'st'
|
26
|
+
when 2: return 'nd'
|
27
|
+
when 3: return 'rd'
|
28
|
+
else return 'th'
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class Time
|
34
|
+
def pretty
|
35
|
+
return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def convert_syntax(syntax, source)
|
40
|
+
return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
|
41
|
+
end
|
42
|
+
|
43
|
+
if ARGV.length >= 1
|
44
|
+
src, template = ARGV
|
45
|
+
template ||= File.join(File.dirname(__FILE__), '/../website/template.rhtml')
|
46
|
+
|
47
|
+
else
|
48
|
+
puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
|
49
|
+
exit!
|
50
|
+
end
|
51
|
+
|
52
|
+
template = ERB.new(File.open(template).read)
|
53
|
+
|
54
|
+
title = nil
|
55
|
+
body = nil
|
56
|
+
File.open(src) do |fsrc|
|
57
|
+
title_text = fsrc.readline
|
58
|
+
body_text = fsrc.read
|
59
|
+
syntax_items = []
|
60
|
+
body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</\1>!m){
|
61
|
+
ident = syntax_items.length
|
62
|
+
element, syntax, source = $1, $2, $3
|
63
|
+
syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
|
64
|
+
"syntax-temp-#{ident}"
|
65
|
+
}
|
66
|
+
title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
|
67
|
+
body = RedCloth.new(body_text).to_html
|
68
|
+
body.gsub!(%r!(?:<pre><code>)?syntax-temp-(\d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
|
69
|
+
end
|
70
|
+
stat = File.stat(src)
|
71
|
+
created = stat.ctime
|
72
|
+
modified = stat.mtime
|
73
|
+
|
74
|
+
$stdout << template.result(binding)
|