clusterable 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ module Clusterable
2
+
3
+ VERSION = '0.0.1'
4
+
5
+ autoload :Point, 'clusterable/point'
6
+ autoload :Cluster, 'clusterable/cluster'
7
+ end
@@ -0,0 +1,85 @@
1
+ module Clusterable
2
+ class Cluster
3
+ attr_accessor :points, :center
4
+
5
+ def initialize(points = [], center = nil)
6
+ raise "can't initialize an empty cluster" unless points.size > 0
7
+ self.points = points
8
+ self.center = center || calculate_center
9
+ end
10
+
11
+ # See: http://en.wikipedia.org/wiki/K-means_clustering
12
+ def self.kmeans(points, sample_size, distance_cutoff)
13
+ initial = pick_random(points, sample_size)
14
+ clusters = initial.map do |point|
15
+ new([point], point)
16
+ end
17
+
18
+ while true
19
+ lists = []
20
+ clusters.each { lists << [] }
21
+
22
+ points.each do |point|
23
+ smallest_distance = point.distance_to(clusters.first.center)
24
+ index = 0
25
+
26
+ clusters[1..clusters.length].each.with_index do |cluster, i|
27
+ distance = point.distance_to(cluster.center)
28
+
29
+ if distance < smallest_distance
30
+ smalles_distance = distance
31
+ index = i + 1
32
+ end
33
+ end
34
+
35
+ lists[index] << point
36
+ end
37
+
38
+ biggest_shift = 0.0
39
+ clusters.each.with_index do |cluster, index|
40
+ if lists[index].size > 0
41
+ shift = cluster.update(lists[index])
42
+ biggest_shift = [shift, biggest_shift].max
43
+ end
44
+ end
45
+
46
+ break if biggest_shift < distance_cutoff
47
+ end
48
+
49
+ clusters
50
+ end
51
+
52
+ def update(points)
53
+ self.points = points.compact
54
+ old_center = center.dup
55
+ self.center = calculate_center
56
+
57
+ center.distance_to(old_center)
58
+ end
59
+
60
+ private
61
+
62
+ # This random array picker should be optimized for speed,
63
+ # but this is untested.
64
+ def self.pick_random(array, number_of_items)
65
+ size = array.size
66
+ indexes = []
67
+
68
+ number_of_items.times do
69
+ random = rand(size) while random.nil? || indexes.include?(random)
70
+ indexes << random
71
+ end
72
+
73
+ indexes.map do |index|
74
+ array[index]
75
+ end
76
+ end
77
+
78
+ def calculate_center
79
+ latitude = points.inject(0.0) { |sum, point| sum + point.latitude } / points.length
80
+ longitude = points.inject(0.0) { |sum, point| sum + point.longitude } / points.length
81
+
82
+ Point.new(latitude, longitude)
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,27 @@
1
+ module Clusterable
2
+ class Point < Hash
3
+ def initialize(latitude, longitude)
4
+ self[:latitude] = latitude
5
+ self[:longitude] = longitude
6
+ end
7
+
8
+ def latitude
9
+ self[:latitude]
10
+ end
11
+
12
+ def longitude
13
+ self[:longitude]
14
+ end
15
+
16
+ def distance_to(target)
17
+ Math.sqrt(
18
+ ((latitude - target.latitude ) ** 2) +
19
+ ((longitude - target.longitude) ** 2)
20
+ )
21
+ end
22
+
23
+ def to_s
24
+ [latitude, longitude].join(", ")
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,74 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusterable::Cluster do
4
+ def random_point(lat = nil, lng = nil)
5
+ Clusterable::Point.new(lat || 50.9985319,
6
+ lng || 5.8573735)
7
+ end
8
+
9
+ describe 'instance methods' do
10
+ describe '#initialize' do
11
+ it 'returns a cluster' do
12
+ Clusterable::Cluster.new([random_point]).should be_an_instance_of(Clusterable::Cluster)
13
+ end
14
+
15
+ it 'requires at least 1 point' do
16
+ lambda {
17
+ Clusterable::Cluster.new([])
18
+ }.should raise_error
19
+ end
20
+
21
+ it 'sets the points attribute' do
22
+ points = [random_point]
23
+ cluster = Clusterable::Cluster.new(points)
24
+
25
+ cluster.points.should eql(points)
26
+ end
27
+
28
+ it 'accepts an optional center point' do
29
+ lambda {
30
+ Clusterable::Cluster.new([random_point], random_point)
31
+ }.should_not raise_error
32
+ end
33
+
34
+ it 'sets the center attribute to the specified center point' do
35
+ center = random_point
36
+ cluster = Clusterable::Cluster.new([random_point], center)
37
+ cluster.center.should eql(center)
38
+ end
39
+
40
+ it 'calculates the center point if unspecified' do
41
+ points = [random_point(50, 5), random_point(60, 10)]
42
+ cluster = Clusterable::Cluster.new(points)
43
+
44
+ cluster.center.should_not be_nil
45
+ cluster.center.latitude.should eql(55.0)
46
+ cluster.center.longitude.should eql(7.5)
47
+ end
48
+ end
49
+
50
+ describe '#update' do
51
+ let(:cluster) { Clusterable::Cluster.new([random_point]) }
52
+ let(:target) { Clusterable::Point.new(50.8889999, 5.9773202) }
53
+
54
+ it 'overwrites the points attribute' do
55
+ points = [target]
56
+ cluster.update(points)
57
+
58
+ cluster.points.should eql(points)
59
+ end
60
+
61
+ it 'recalculates the center point' do
62
+ points = [random_point(50, 5)]
63
+ cluster.update(points)
64
+
65
+ cluster.center.latitude.should eql(50.0)
66
+ cluster.center.longitude.should eql(5.0)
67
+ end
68
+
69
+ it 'returns the distance from the old center to the new center' do
70
+ cluster.update([target]).should eql(0.16243297037513815)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,46 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusterable::Point do
4
+ describe 'instance methods' do
5
+ let(:marker) { Clusterable::Point.new(50.9985319, 5.8573735) }
6
+
7
+ describe '#initialize' do
8
+ let(:marker) { Clusterable::Point.new('latitude', 'longitude') }
9
+
10
+ it 'sets the latitude' do
11
+ marker[:latitude].should eql('latitude')
12
+ end
13
+
14
+ it 'sets the longitude' do
15
+ marker[:longitude].should eql('longitude')
16
+ end
17
+ end
18
+
19
+ describe '#latitude' do
20
+ it 'returns the latitude' do
21
+ marker.latitude.should eql(marker[:latitude])
22
+ end
23
+ end
24
+
25
+ describe '#longitude' do
26
+ it 'returns the longitude' do
27
+ marker.longitude.should eql(marker[:longitude])
28
+ end
29
+ end
30
+
31
+ describe '#distance_to' do
32
+ let(:target) { Clusterable::Point.new(50.8889999, 5.9773202) }
33
+
34
+ it 'returns the distance to another point' do
35
+ marker.distance_to(marker).should eql(0.0)
36
+ marker.distance_to(target).should eql(0.16243297037513815)
37
+ end
38
+ end
39
+
40
+ describe '#to_s' do
41
+ it 'returns the point as a string' do
42
+ marker.to_s.should eql('50.9985319, 5.8573735')
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe Clusterable do
4
+ describe 'VERSION' do
5
+ it "is a valid semantic version" do
6
+ Clusterable::VERSION.should match(/^[0-9]+\.[0-9]+\.[0-9]+([a-zA-Z]([0-9A-Za-z-]+)?)?$/)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+ require 'rubygems'
5
+ require 'clusterable'
6
+ require 'rspec'
7
+
8
+ RSpec.configure do |config|
9
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clusterable
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Tom-Eric Gerritsen
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-27 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Create clusters of points.
15
+ email:
16
+ - tomeric@eet.nu
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - benchmark/benchmark.rb
22
+ - benchmark/points.csv
23
+ - lib/clusterable/cluster.rb
24
+ - lib/clusterable/point.rb
25
+ - lib/clusterable.rb
26
+ - spec/lib/clusterable/cluster_spec.rb
27
+ - spec/lib/clusterable/point_spec.rb
28
+ - spec/lib/clusterable_spec.rb
29
+ - spec/spec_helper.rb
30
+ - LICENSE
31
+ - Rakefile
32
+ - Gemfile
33
+ - README.md
34
+ homepage: http://github.com/eet-nu/clusterable
35
+ licenses: []
36
+ post_install_message:
37
+ rdoc_options: []
38
+ require_paths:
39
+ - lib
40
+ required_ruby_version: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ segments:
47
+ - 0
48
+ hash: -2152434804067564302
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ segments:
56
+ - 0
57
+ hash: -2152434804067564302
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.10
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: Create clusters of points.
64
+ test_files: []