cluda 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ module Cluda
2
+ class Chebyshev < Distance
3
+
4
+ def self.distance(x0, x)
5
+ validate( [x0,x] )
6
+
7
+ [ (x0[:x] - x[:x]).abs, (x0[:y] - x[:y]).abs ].max
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,24 @@
1
+ module Cluda
2
+ class InvalidPoint < RuntimeError; end
3
+
4
+ class Distance
5
+ extend Math
6
+
7
+ def self.distance(x0, x)
8
+ raise ::NotImplementedError.new("You must implement distance method")
9
+ end
10
+
11
+ protected
12
+
13
+ def self.validate( data )
14
+ points = data.is_a?(Array) ? data : [ data ]
15
+ points.each do |point|
16
+ raise InvalidPoint unless point.is_a?(Hash) &&
17
+ point.include?(:x) && point.include?(:y) &&
18
+ point[:x].is_a?(Numeric) && point[:y].is_a?(Numeric)
19
+ end
20
+
21
+ points
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,10 @@
1
+ module Cluda
2
+ class Euclidean < Distance
3
+
4
+ def self.distance(x0, x)
5
+ validate( [x0,x] )
6
+
7
+ sqrt( (x0[:x] - x[:x]) ** 2 + (x0[:y] - x[:y]) ** 2 )
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ module Cluda
2
+ class Manhattan < Distance
3
+
4
+ def self.distance(x0, x)
5
+ validate( [x0,x] )
6
+
7
+ (x0[:x] - x[:x]).abs + (x0[:y] - x[:y]).abs
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,95 @@
1
+ require 'cluda/distances/manhattan'
2
+ require 'cluda/distances/euclidean'
3
+ require 'cluda/distances/chebyshev'
4
+
5
+ module Cluda
6
+ class InvalidDistanceMethod < RuntimeError; end
7
+
8
+ class Kmeans
9
+ def self.classify( list, k, class_name = 'euclidean', max_iterations = 50 )
10
+ raise InvalidDistanceMethod unless valid_class?(class_name)
11
+
12
+ _class = Cluda.const_get( class_name.downcase.capitalize )
13
+ _class.validate( list )
14
+
15
+ iter = 1
16
+ previous_centroids = nil
17
+ centroids = initialize_centroids( list , k, _class )
18
+
19
+ while (iter < max_iterations) && (previous_centroids != centroids)
20
+ output = init_output(centroids)
21
+
22
+ list.each do |point|
23
+ output[nearest_centroid(point, centroids, _class)] << point
24
+ end
25
+
26
+ iter += 1
27
+ previous_centroids = centroids
28
+ centroids = move_centroids( output )
29
+ end
30
+
31
+ output
32
+ end
33
+
34
+ protected
35
+
36
+ def self.nearest_centroid(point, centroids, _class = Cluda::Euclidean )
37
+ return nil if centroids.empty?
38
+
39
+ _class.validate( point )
40
+
41
+ nearest_centroid = centroids[0]
42
+ min_distance = _class.distance(point, nearest_centroid)
43
+
44
+ centroids.each do |centroid|
45
+ new_distance = _class.distance(point, centroid)
46
+ if new_distance < min_distance
47
+ min_distance = new_distance
48
+ nearest_centroid = centroid
49
+ end
50
+ end
51
+
52
+ nearest_centroid
53
+ end
54
+
55
+ def self.initialize_centroids( list , k, _class = Cluda::Euclidean )
56
+ _class.validate( list )
57
+
58
+ return [] if list.empty? || k > list.size
59
+
60
+ list.shuffle( random: Random.new.rand(0...k) )[0...k]
61
+ end
62
+
63
+ private
64
+
65
+ def self.valid_class?( name )
66
+ ['euclidean', 'chebyshev', 'manhattan'].include?( name.downcase )
67
+ end
68
+
69
+ def self.init_output(centroids)
70
+ centroids.each_with_object({}) do |centroid, memo|
71
+ memo[centroid] = []
72
+ end
73
+ end
74
+
75
+ def self.median( list )
76
+ sorted_list = list.sort
77
+ len = list.size
78
+
79
+ sorted_list[( (len / 2 ) + 0.5 ).floor]
80
+ end
81
+
82
+ def self.get_key_values( points, key )
83
+ points.map { |point| point[key] }
84
+ end
85
+
86
+ def self.move_centroids( output )
87
+ output.map do |(key, value)|
88
+ x = median( get_key_values(value, :x) )
89
+ y = median( get_key_values(value, :y) )
90
+
91
+ { x: x, y: y }
92
+ end
93
+ end
94
+ end
95
+ end
data/lib/cluda.rb ADDED
@@ -0,0 +1,4 @@
1
+ $:.unshift File.expand_path('..', __FILE__ )
2
+
3
+ require 'cluda/distances/distance'
4
+ require 'cluda/kmeans'
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cluda
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Enrique Figuerola
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-01-29 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.11.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.11.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: CLustering Data Analysis gem
47
+ email: hard_rock15@msn.com
48
+ executables: []
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - lib/cluda.rb
53
+ - lib/cluda/kmeans.rb
54
+ - lib/cluda/distances/distance.rb
55
+ - lib/cluda/distances/euclidean.rb
56
+ - lib/cluda/distances/manhattan.rb
57
+ - lib/cluda/distances/chebyshev.rb
58
+ homepage: https://github.com/emfigo/cluda
59
+ licenses:
60
+ - MIT
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 1.8.23
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: CLuDA
83
+ test_files: []