clusto 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Conor Mulligan, Solas.ca
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,16 @@
1
+ Clusto
2
+ ======
3
+
4
+ *Simple Ruby point clustering*
5
+
6
+ If you're familiar with the [k-means][1] algorithm then Clusto's clustering process will probably be broadly recognisable, although it does take some liberties in the name of performance.
7
+
8
+ Usage
9
+ -----
10
+
11
+ ```ruby
12
+ points = data.map { |p| Clusto::Point.new(p.x, p.y) }
13
+ clusters = Clusto.cluster(points)
14
+ ```
15
+
16
+ [1]: http://en.wikipedia.org/wiki/K-means_clustering
data/clusto.gemspec ADDED
@@ -0,0 +1,9 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'clusto'
3
+ s.version = '0.1.0'
4
+ s.author = 'Conor Mulligan'
5
+ s.email = 'conmulligan@gmail.com'
6
+ s.summary = 'Simple Ruby point clustering'
7
+
8
+ s.files = Dir["#{File.dirname(__FILE__)}/**/*"]
9
+ end
data/lib/clusto.rb ADDED
@@ -0,0 +1,81 @@
1
+ require 'clusto/point'
2
+ require 'clusto/cluster'
3
+ require 'clusto/distance'
4
+
5
+ module Clusto
6
+ extend self
7
+
8
+ INFINITY = +1.0/0
9
+ RADIANS_PER_DEGREES = Math::PI / 180
10
+ GREAT_CIRCLE_RADIUS = 6371
11
+
12
+ GRID_SIZE = 8
13
+
14
+ def minimum_bounding_rectangle(points)
15
+ xs = points.collect { |p| p.x }.minmax
16
+ ys = points.collect { |p| p.y }.minmax
17
+
18
+ [xs[0], ys[0], xs[1], ys[1]]
19
+ end
20
+
21
+ def cluster(points, options={})
22
+ bounds = options[:bounds] || Clusto.minimum_bounding_rectangle(points)
23
+ distance_method = options[:distance_method] || :haversine
24
+
25
+ # Create clusters
26
+
27
+ clusters = []
28
+
29
+ x = bounds[0]
30
+ y = bounds[3]
31
+ w = bounds[2] - bounds[0]
32
+ h = bounds[3] - bounds[1]
33
+ grid_w = w / GRID_SIZE
34
+ grid_h = h / GRID_SIZE
35
+
36
+ column = 0
37
+ row = 0
38
+
39
+ (GRID_SIZE ** 2).times do |i|
40
+ offset_x = x + (column * grid_w)
41
+ offset_y = y - (row * grid_h)
42
+
43
+ center = Clusto::Point.new(offset_x + (grid_w / 2), offset_y - (grid_h / 2))
44
+ clusters.push(Clusto::Cluster.new(center))
45
+
46
+ column = column+1
47
+ if column == GRID_SIZE
48
+ column = 0
49
+ row = row+1
50
+ end
51
+ end
52
+
53
+ # Assign points to clusters
54
+
55
+ points.each do |point|
56
+ distance = INFINITY
57
+ nearest = nil
58
+
59
+ clusters.each do |cluster|
60
+ begin
61
+ d = Clusto::Distance.send(distance_method, cluster.location, point)
62
+ rescue NoMethodError
63
+ raise "Unknown distance method '#{distance_method}."
64
+ end
65
+
66
+ if d < distance
67
+ distance = d
68
+ nearest = cluster
69
+ end
70
+ end
71
+
72
+ nearest.points.push(point)
73
+ end
74
+
75
+ # Reposition clusters
76
+
77
+ clusters.each do |cluster|
78
+ cluster.reposition
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,28 @@
1
+ module Clusto
2
+ class Cluster
3
+ attr_accessor :location, :points
4
+
5
+ def initialize(location)
6
+ @location = location
7
+ @points = []
8
+ end
9
+
10
+ def reposition
11
+ return if @points.empty?
12
+
13
+ x = 0
14
+ y = 0
15
+ center = @location
16
+
17
+ @points.each do |point|
18
+ x += point.x
19
+ y += point.y
20
+ end
21
+
22
+ x /= points.length
23
+ y /= points.length
24
+
25
+ @location = Clusto::Point.new(x, y)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,19 @@
1
+ module Clusto
2
+ class Distance
3
+ def self.euclidian(pa, pb)
4
+ Math::sqrt((pa.x - pb.x) ** 2 + (pa.y - pb.y) ** 2)
5
+ end
6
+
7
+ def self.haversine(pa, pb)
8
+ x = (pb.x - pa.x) * RADIANS_PER_DEGREES
9
+ y = (pb.y - pa.y) * RADIANS_PER_DEGREES
10
+
11
+ ar = pa.x * RADIANS_PER_DEGREES
12
+ br = pb.x * RADIANS_PER_DEGREES
13
+
14
+ a = Math.sin(x / 2) ** 2 + Math.cos(ar) * Math.cos(br) * Math.sin(y / 2) ** 2
15
+ c = 2 * Math.atan2(Math::sqrt(a), Math::sqrt(1 - a))
16
+ GREAT_CIRCLE_RADIUS * c;
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ module Clusto
2
+ class Point
3
+ attr_accessor :x, :y, :tag
4
+
5
+ def initialize(x, y, tag=0)
6
+ @x = x
7
+ @y = y
8
+ @tag = tag
9
+ end
10
+
11
+ def to_s
12
+ return "(#{@x}, #{@y})"
13
+ end
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clusto
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Conor Mulligan
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-13 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description:
15
+ email: conmulligan@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - ./clusto.gemspec
21
+ - ./lib/clusto/cluster.rb
22
+ - ./lib/clusto/distance.rb
23
+ - ./lib/clusto/point.rb
24
+ - ./lib/clusto.rb
25
+ - ./LICENSE
26
+ - ./README.md
27
+ homepage:
28
+ licenses: []
29
+ post_install_message:
30
+ rdoc_options: []
31
+ require_paths:
32
+ - lib
33
+ required_ruby_version: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ! '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 1.8.15
48
+ signing_key:
49
+ specification_version: 3
50
+ summary: Simple Ruby point clustering
51
+ test_files: []