clusterable 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +6 -0
- data/LICENSE +20 -0
- data/README.md +2 -0
- data/Rakefile +2 -0
- data/benchmark/benchmark.rb +25 -0
- data/benchmark/points.csv +26077 -0
- data/lib/clusterable.rb +7 -0
- data/lib/clusterable/cluster.rb +85 -0
- data/lib/clusterable/point.rb +27 -0
- data/spec/lib/clusterable/cluster_spec.rb +74 -0
- data/spec/lib/clusterable/point_spec.rb +46 -0
- data/spec/lib/clusterable_spec.rb +9 -0
- data/spec/spec_helper.rb +9 -0
- metadata +64 -0
data/lib/clusterable.rb
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
module Clusterable
|
|
2
|
+
class Cluster
|
|
3
|
+
attr_accessor :points, :center
|
|
4
|
+
|
|
5
|
+
def initialize(points = [], center = nil)
|
|
6
|
+
raise "can't initialize an empty cluster" unless points.size > 0
|
|
7
|
+
self.points = points
|
|
8
|
+
self.center = center || calculate_center
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# See: http://en.wikipedia.org/wiki/K-means_clustering
|
|
12
|
+
def self.kmeans(points, sample_size, distance_cutoff)
|
|
13
|
+
initial = pick_random(points, sample_size)
|
|
14
|
+
clusters = initial.map do |point|
|
|
15
|
+
new([point], point)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
while true
|
|
19
|
+
lists = []
|
|
20
|
+
clusters.each { lists << [] }
|
|
21
|
+
|
|
22
|
+
points.each do |point|
|
|
23
|
+
smallest_distance = point.distance_to(clusters.first.center)
|
|
24
|
+
index = 0
|
|
25
|
+
|
|
26
|
+
clusters[1..clusters.length].each.with_index do |cluster, i|
|
|
27
|
+
distance = point.distance_to(cluster.center)
|
|
28
|
+
|
|
29
|
+
if distance < smallest_distance
|
|
30
|
+
smalles_distance = distance
|
|
31
|
+
index = i + 1
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
lists[index] << point
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
biggest_shift = 0.0
|
|
39
|
+
clusters.each.with_index do |cluster, index|
|
|
40
|
+
if lists[index].size > 0
|
|
41
|
+
shift = cluster.update(lists[index])
|
|
42
|
+
biggest_shift = [shift, biggest_shift].max
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
break if biggest_shift < distance_cutoff
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
clusters
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def update(points)
|
|
53
|
+
self.points = points.compact
|
|
54
|
+
old_center = center.dup
|
|
55
|
+
self.center = calculate_center
|
|
56
|
+
|
|
57
|
+
center.distance_to(old_center)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
# This random array picker should be optimized for speed,
|
|
63
|
+
# but this is untested.
|
|
64
|
+
def self.pick_random(array, number_of_items)
|
|
65
|
+
size = array.size
|
|
66
|
+
indexes = []
|
|
67
|
+
|
|
68
|
+
number_of_items.times do
|
|
69
|
+
random = rand(size) while random.nil? || indexes.include?(random)
|
|
70
|
+
indexes << random
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
indexes.map do |index|
|
|
74
|
+
array[index]
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def calculate_center
|
|
79
|
+
latitude = points.inject(0.0) { |sum, point| sum + point.latitude } / points.length
|
|
80
|
+
longitude = points.inject(0.0) { |sum, point| sum + point.longitude } / points.length
|
|
81
|
+
|
|
82
|
+
Point.new(latitude, longitude)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Clusterable
|
|
2
|
+
class Point < Hash
|
|
3
|
+
def initialize(latitude, longitude)
|
|
4
|
+
self[:latitude] = latitude
|
|
5
|
+
self[:longitude] = longitude
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def latitude
|
|
9
|
+
self[:latitude]
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def longitude
|
|
13
|
+
self[:longitude]
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def distance_to(target)
|
|
17
|
+
Math.sqrt(
|
|
18
|
+
((latitude - target.latitude ) ** 2) +
|
|
19
|
+
((longitude - target.longitude) ** 2)
|
|
20
|
+
)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def to_s
|
|
24
|
+
[latitude, longitude].join(", ")
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Clusterable::Cluster do
|
|
4
|
+
def random_point(lat = nil, lng = nil)
|
|
5
|
+
Clusterable::Point.new(lat || 50.9985319,
|
|
6
|
+
lng || 5.8573735)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
describe 'instance methods' do
|
|
10
|
+
describe '#initialize' do
|
|
11
|
+
it 'returns a cluster' do
|
|
12
|
+
Clusterable::Cluster.new([random_point]).should be_an_instance_of(Clusterable::Cluster)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'requires at least 1 point' do
|
|
16
|
+
lambda {
|
|
17
|
+
Clusterable::Cluster.new([])
|
|
18
|
+
}.should raise_error
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'sets the points attribute' do
|
|
22
|
+
points = [random_point]
|
|
23
|
+
cluster = Clusterable::Cluster.new(points)
|
|
24
|
+
|
|
25
|
+
cluster.points.should eql(points)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it 'accepts an optional center point' do
|
|
29
|
+
lambda {
|
|
30
|
+
Clusterable::Cluster.new([random_point], random_point)
|
|
31
|
+
}.should_not raise_error
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'sets the center attribute to the specified center point' do
|
|
35
|
+
center = random_point
|
|
36
|
+
cluster = Clusterable::Cluster.new([random_point], center)
|
|
37
|
+
cluster.center.should eql(center)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it 'calculates the center point if unspecified' do
|
|
41
|
+
points = [random_point(50, 5), random_point(60, 10)]
|
|
42
|
+
cluster = Clusterable::Cluster.new(points)
|
|
43
|
+
|
|
44
|
+
cluster.center.should_not be_nil
|
|
45
|
+
cluster.center.latitude.should eql(55.0)
|
|
46
|
+
cluster.center.longitude.should eql(7.5)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
describe '#update' do
|
|
51
|
+
let(:cluster) { Clusterable::Cluster.new([random_point]) }
|
|
52
|
+
let(:target) { Clusterable::Point.new(50.8889999, 5.9773202) }
|
|
53
|
+
|
|
54
|
+
it 'overwrites the points attribute' do
|
|
55
|
+
points = [target]
|
|
56
|
+
cluster.update(points)
|
|
57
|
+
|
|
58
|
+
cluster.points.should eql(points)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it 'recalculates the center point' do
|
|
62
|
+
points = [random_point(50, 5)]
|
|
63
|
+
cluster.update(points)
|
|
64
|
+
|
|
65
|
+
cluster.center.latitude.should eql(50.0)
|
|
66
|
+
cluster.center.longitude.should eql(5.0)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it 'returns the distance from the old center to the new center' do
|
|
70
|
+
cluster.update([target]).should eql(0.16243297037513815)
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe Clusterable::Point do
|
|
4
|
+
describe 'instance methods' do
|
|
5
|
+
let(:marker) { Clusterable::Point.new(50.9985319, 5.8573735) }
|
|
6
|
+
|
|
7
|
+
describe '#initialize' do
|
|
8
|
+
let(:marker) { Clusterable::Point.new('latitude', 'longitude') }
|
|
9
|
+
|
|
10
|
+
it 'sets the latitude' do
|
|
11
|
+
marker[:latitude].should eql('latitude')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it 'sets the longitude' do
|
|
15
|
+
marker[:longitude].should eql('longitude')
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe '#latitude' do
|
|
20
|
+
it 'returns the latitude' do
|
|
21
|
+
marker.latitude.should eql(marker[:latitude])
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
describe '#longitude' do
|
|
26
|
+
it 'returns the longitude' do
|
|
27
|
+
marker.longitude.should eql(marker[:longitude])
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
describe '#distance_to' do
|
|
32
|
+
let(:target) { Clusterable::Point.new(50.8889999, 5.9773202) }
|
|
33
|
+
|
|
34
|
+
it 'returns the distance to another point' do
|
|
35
|
+
marker.distance_to(marker).should eql(0.0)
|
|
36
|
+
marker.distance_to(target).should eql(0.16243297037513815)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe '#to_s' do
|
|
41
|
+
it 'returns the point as a string' do
|
|
42
|
+
marker.to_s.should eql('50.9985319, 5.8573735')
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: clusterable
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
prerelease:
|
|
6
|
+
platform: ruby
|
|
7
|
+
authors:
|
|
8
|
+
- Tom-Eric Gerritsen
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: bin
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2011-12-27 00:00:00.000000000 Z
|
|
13
|
+
dependencies: []
|
|
14
|
+
description: Create clusters of points.
|
|
15
|
+
email:
|
|
16
|
+
- tomeric@eet.nu
|
|
17
|
+
executables: []
|
|
18
|
+
extensions: []
|
|
19
|
+
extra_rdoc_files: []
|
|
20
|
+
files:
|
|
21
|
+
- benchmark/benchmark.rb
|
|
22
|
+
- benchmark/points.csv
|
|
23
|
+
- lib/clusterable/cluster.rb
|
|
24
|
+
- lib/clusterable/point.rb
|
|
25
|
+
- lib/clusterable.rb
|
|
26
|
+
- spec/lib/clusterable/cluster_spec.rb
|
|
27
|
+
- spec/lib/clusterable/point_spec.rb
|
|
28
|
+
- spec/lib/clusterable_spec.rb
|
|
29
|
+
- spec/spec_helper.rb
|
|
30
|
+
- LICENSE
|
|
31
|
+
- Rakefile
|
|
32
|
+
- Gemfile
|
|
33
|
+
- README.md
|
|
34
|
+
homepage: http://github.com/eet-nu/clusterable
|
|
35
|
+
licenses: []
|
|
36
|
+
post_install_message:
|
|
37
|
+
rdoc_options: []
|
|
38
|
+
require_paths:
|
|
39
|
+
- lib
|
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
41
|
+
none: false
|
|
42
|
+
requirements:
|
|
43
|
+
- - ! '>='
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: '0'
|
|
46
|
+
segments:
|
|
47
|
+
- 0
|
|
48
|
+
hash: -2152434804067564302
|
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
|
+
none: false
|
|
51
|
+
requirements:
|
|
52
|
+
- - ! '>='
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '0'
|
|
55
|
+
segments:
|
|
56
|
+
- 0
|
|
57
|
+
hash: -2152434804067564302
|
|
58
|
+
requirements: []
|
|
59
|
+
rubyforge_project:
|
|
60
|
+
rubygems_version: 1.8.10
|
|
61
|
+
signing_key:
|
|
62
|
+
specification_version: 3
|
|
63
|
+
summary: Create clusters of points.
|
|
64
|
+
test_files: []
|