hierclust 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.1.0 2008-02-01
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Brandt Kurowski <brandt@kurowski.net>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,34 @@
1
+ History.txt
2
+ License.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ config/hoe.rb
7
+ config/requirements.rb
8
+ lib/hierclust.rb
9
+ lib/hierclust/cluster.rb
10
+ lib/hierclust/clusterer.rb
11
+ lib/hierclust/distances.rb
12
+ lib/hierclust/point.rb
13
+ lib/hierclust/version.rb
14
+ log/debug.log
15
+ script/destroy
16
+ script/generate
17
+ script/txt2html
18
+ setup.rb
19
+ spec/hierclust/cluster_spec.rb
20
+ spec/hierclust/clusterer_spec.rb
21
+ spec/hierclust/distances_spec.rb
22
+ spec/hierclust/point_spec.rb
23
+ spec/hierclust_spec.rb
24
+ spec/spec.opts
25
+ spec/spec_helper.rb
26
+ tasks/deployment.rake
27
+ tasks/environment.rake
28
+ tasks/rspec.rake
29
+ tasks/website.rake
30
+ website/index.html
31
+ website/index.txt
32
+ website/javascripts/rounded_corners_lite.inc.js
33
+ website/stylesheets/screen.css
34
+ website/template.rhtml
data/README.txt ADDED
@@ -0,0 +1,14 @@
1
+ = Hierclust
2
+
3
+ A simple hierarchical clustering library for spatial data.
4
+
5
+ == Example
6
+
7
+ require 'hierclust'
8
+ points = (1..6).map { Hierclust::Point.new(rand(10), rand(10)) }
9
+ clusterer = Hierclust::Clusterer.new(points)
10
+ puts clusterer.clusters => [[[(4, 9), (4, 8)], (9, 6)], [[(1, 4), (3, 1)], (6, 3)]]
11
+
12
+ == Contact
13
+
14
+ Brandt Kurowski <brandt@kurowski.net>
data/Rakefile ADDED
@@ -0,0 +1,4 @@
1
+ require 'config/requirements'
2
+ require 'config/hoe' # setup Hoe + all gem configuration
3
+
4
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
data/config/hoe.rb ADDED
@@ -0,0 +1,71 @@
1
+ require 'hierclust/version'
2
+
3
+ AUTHOR = 'Brandt Kurowski' # can also be an array of Authors
4
+ EMAIL = "brandt@kurowski.net"
5
+ DESCRIPTION = "performs hierarchical clustering on geometric points"
6
+ GEM_NAME = 'hierclust' # what ppl will type to install your gem
7
+ RUBYFORGE_PROJECT = 'hierclust' # The unix name for your project
8
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
+
11
+ @config_file = "~/.rubyforge/user-config.yml"
12
+ @config = nil
13
+ RUBYFORGE_USERNAME = "kurowski"
14
+ def rubyforge_username
15
+ unless @config
16
+ begin
17
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
18
+ rescue
19
+ puts <<-EOS
20
+ ERROR: No rubyforge config file found: #{@config_file}
21
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
22
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
23
+ EOS
24
+ exit
25
+ end
26
+ end
27
+ RUBYFORGE_USERNAME.replace @config["username"]
28
+ end
29
+
30
+
31
+ REV = nil
32
+ # UNCOMMENT IF REQUIRED:
33
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
34
+ VERS = Hierclust::VERSION::STRING + (REV ? ".#{REV}" : "")
35
+ RDOC_OPTS = ['--quiet', '--title', 'hierclust documentation',
36
+ "--opname", "index.html",
37
+ "--line-numbers",
38
+ "--main", "README",
39
+ "--inline-source"]
40
+
41
+ class Hoe
42
+ def extra_deps
43
+ @extra_deps.reject! { |x| Array(x).first == 'hoe' }
44
+ @extra_deps
45
+ end
46
+ end
47
+
48
+ # Generate all the Rake tasks
49
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
50
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
51
+ p.author = AUTHOR
52
+ p.description = DESCRIPTION
53
+ p.email = EMAIL
54
+ p.summary = DESCRIPTION
55
+ p.url = HOMEPATH
56
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
57
+ p.test_globs = ["spec/**/*_spec.rb"]
58
+ p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
59
+
60
+ # == Optional
61
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
62
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
63
+
64
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
65
+
66
+ end
67
+
68
+ CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
69
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
70
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
71
+ hoe.rsync_args = '-av --delete --ignore-errors'
@@ -0,0 +1,17 @@
1
+ require 'fileutils'
2
+ include FileUtils
3
+
4
+ require 'rubygems'
5
+ %w[rake hoe newgem rubigen].each do |req_gem|
6
+ begin
7
+ require req_gem
8
+ rescue LoadError
9
+ puts "This Rakefile requires the '#{req_gem}' RubyGem."
10
+ puts "Installation: gem install #{req_gem} -y"
11
+ exit
12
+ end
13
+ end
14
+
15
+ $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
16
+
17
+ require 'hierclust'
@@ -0,0 +1,43 @@
1
+ module Hierclust
2
+ # A Cluster represents a collection of Points. A Cluster has it's own
3
+ # coordinates that are the mean of the coordinates of it's points.
4
+ # Because a Cluster has coordinates, it can act as a Point and therefore
5
+ # be included in other Clusters.
6
+ class Cluster < Point
7
+ # An array of items in this cluster
8
+ attr_accessor :items
9
+
10
+ # Create a Cluster for the given set of +items+.
11
+ def initialize(items)
12
+ @items = items
13
+ end
14
+
15
+ # Returns the average x-coordinates of all items in this Cluster.
16
+ def x
17
+ return nil if size == 0
18
+ @x ||= @items.inject(0) {|sum, p| sum + p.x} / size
19
+ end
20
+
21
+ # Returns the average y-coordinates of all items in this Cluster.
22
+ def y
23
+ return nil if size == 0
24
+ @y ||= @items.inject(0) {|sum, p| sum + p.y} / size
25
+ end
26
+
27
+ # Returns the number of items in this Cluster.
28
+ def size
29
+ @items.size
30
+ end
31
+
32
+ # Returns +true+ if this Cluster includes the given +item+, otherwise
33
+ # returns +false+.
34
+ def include?(item)
35
+ @items.include? item
36
+ end
37
+
38
+ # Returns a legible representation of this Cluster and it's items.
39
+ def to_s
40
+ "[#{@items.join(', ')}]"
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,47 @@
1
+ module Hierclust
2
+ # Clusters a set of Points using Hierarchical Clustering, stopping either
3
+ # when the hierarchy is complete or the clusters are separated by a given
4
+ # minimum distance.
5
+ class Clusterer
6
+ # The Distances for the items being clustered
7
+ attr_reader :distances
8
+
9
+ # Create a new Clusterer for the given data.
10
+ #
11
+ # Specify +separation+ to stop the clustering process once all the
12
+ # items are at least +separation+ units apart.
13
+ def initialize(data, separation = nil)
14
+ @data = data.dup
15
+ @separation = separation
16
+ @distances = Distances.new(@data)
17
+ end
18
+
19
+ # Calculates and returns the set of clusters.
20
+ def clusters
21
+ return @data if @separation && @distances.separation > @separation
22
+ while @data.length > 1
23
+ @distances = Distances.new(@data)
24
+ return @data if @separation && @distances.separation > @separation
25
+ @data = find_cluster
26
+ end
27
+ @data
28
+ end
29
+
30
+ private
31
+
32
+ def find_cluster
33
+ case @data.length
34
+ when 0
35
+ []
36
+ when 1
37
+ [Cluster.new([@data[0]])]
38
+ when 2
39
+ [Cluster.new([@data[0], @data[1]])]
40
+ else
41
+ nearest = @distances.nearest
42
+ outliers = @distances.outliers
43
+ [Cluster.new(nearest), *outliers]
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,56 @@
1
+ module Hierclust
2
+ # Represents the pair-wise distances between a set of items.
3
+ class Distances
4
+ # Create a new Distances for the given +items+
5
+ def initialize(items)
6
+ @items = items
7
+ @distances = {}
8
+ end
9
+
10
+ # Returns the distance between items +a+ and +b+.
11
+ def [](a, b)
12
+ key = (a.object_id < b.object_id) ?
13
+ "#{a.object_id},#{b.object_id}" :
14
+ "#{b.object_id},#{a.object_id}"
15
+ @distances[key] ||= Math.sqrt((a.x - b.x) ** 2 + (a.y - b.y) ** 2)
16
+ end
17
+
18
+ # Returns the pair of items that are nearest to each other.
19
+ def nearest
20
+ @nearest ||= begin
21
+ key = nearest_pair[0]
22
+ a_id, b_id = key.split ','
23
+ a = @items.select {|item| item.object_id.to_s == a_id}.first
24
+ b = @items.select {|item| item.object_id.to_s == b_id}.first
25
+ [a, b]
26
+ end
27
+ end
28
+
29
+ # Returns all items except the pair that are nearest to each other.
30
+ def outliers
31
+ @outliers ||= begin
32
+ @items - nearest
33
+ end
34
+ end
35
+
36
+ # Returns the smallest distance between any pair of items.
37
+ def separation
38
+ @separation ||= @items.size < 2 ? 0 : nearest_pair[1]
39
+ end
40
+
41
+ private
42
+
43
+ def nearest_pair
44
+ prepopulate_cache
45
+ @distances.to_a.sort{|a, b| a[1] <=> b[1]}.first
46
+ end
47
+
48
+ def prepopulate_cache
49
+ items = @items.dup
50
+ while !items.empty?
51
+ origin = items.shift
52
+ items.each {|item| self[origin, item]}
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,31 @@
1
+ module Hierclust
2
+ # A Point represents a single point in 2-dimensional space.
3
+ class Point
4
+ # x-coordinate
5
+ attr_accessor :x
6
+
7
+ # y-coordinate
8
+ attr_accessor :y
9
+
10
+ # Create a new Point with the given x- and y-coordinates.
11
+ def initialize(x, y)
12
+ @x = x
13
+ @y = y
14
+ end
15
+
16
+ # Simplifies code by letting us treat Clusters and Points interchangeably
17
+ def size #:nodoc:
18
+ 1
19
+ end
20
+
21
+ # Returns a legible representation of this Point.
22
+ def to_s
23
+ "(#{x}, #{y})"
24
+ end
25
+
26
+ # Sorts points relative to each other on the x-axis.
27
+ def <=>(other)
28
+ return self.x <=> other.x
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,9 @@
1
+ module Hierclust #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/lib/hierclust.rb ADDED
@@ -0,0 +1,9 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'hierclust/point'
4
+ require 'hierclust/cluster'
5
+ require 'hierclust/distances'
6
+ require 'hierclust/clusterer'
7
+
8
+ module Hierclust
9
+ end
data/log/debug.log ADDED
File without changes
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.join(File.dirname(__FILE__), '..')
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.join(File.dirname(__FILE__), '..')
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
data/script/txt2html ADDED
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ begin
5
+ require 'newgem'
6
+ rescue LoadError
7
+ puts "\n\nGenerating the website requires the newgem RubyGem"
8
+ puts "Install: gem install newgem\n\n"
9
+ exit(1)
10
+ end
11
+ require 'redcloth'
12
+ require 'syntax/convertors/html'
13
+ require 'erb'
14
+ require File.dirname(__FILE__) + '/../lib/hierclust/version.rb'
15
+
16
+ version = Hierclust::VERSION::STRING
17
+ download = 'http://rubyforge.org/projects/hierclust'
18
+
19
+ class Fixnum
20
+ def ordinal
21
+ # teens
22
+ return 'th' if (10..19).include?(self % 100)
23
+ # others
24
+ case self % 10
25
+ when 1: return 'st'
26
+ when 2: return 'nd'
27
+ when 3: return 'rd'
28
+ else return 'th'
29
+ end
30
+ end
31
+ end
32
+
33
+ class Time
34
+ def pretty
35
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
36
+ end
37
+ end
38
+
39
+ def convert_syntax(syntax, source)
40
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
41
+ end
42
+
43
+ if ARGV.length >= 1
44
+ src, template = ARGV
45
+ template ||= File.join(File.dirname(__FILE__), '/../website/template.rhtml')
46
+
47
+ else
48
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
49
+ exit!
50
+ end
51
+
52
+ template = ERB.new(File.open(template).read)
53
+
54
+ title = nil
55
+ body = nil
56
+ File.open(src) do |fsrc|
57
+ title_text = fsrc.readline
58
+ body_text = fsrc.read
59
+ syntax_items = []
60
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</\1>!m){
61
+ ident = syntax_items.length
62
+ element, syntax, source = $1, $2, $3
63
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
64
+ "syntax-temp-#{ident}"
65
+ }
66
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
67
+ body = RedCloth.new(body_text).to_html
68
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(\d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
69
+ end
70
+ stat = File.stat(src)
71
+ created = stat.ctime
72
+ modified = stat.mtime
73
+
74
+ $stdout << template.result(binding)