cluda 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ module Cluda
2
+ class Chebyshev < Distance
3
+
4
+ def self.distance(x0, x)
5
+ validate( [x0,x] )
6
+
7
+ [ (x0[:x] - x[:x]).abs, (x0[:y] - x[:y]).abs ].max
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,24 @@
1
+ module Cluda
2
+ class InvalidPoint < RuntimeError; end
3
+
4
+ class Distance
5
+ extend Math
6
+
7
+ def self.distance(x0, x)
8
+ raise ::NotImplementedError.new("You must implement distance method")
9
+ end
10
+
11
+ protected
12
+
13
+ def self.validate( data )
14
+ points = data.is_a?(Array) ? data : [ data ]
15
+ points.each do |point|
16
+ raise InvalidPoint unless point.is_a?(Hash) &&
17
+ point.include?(:x) && point.include?(:y) &&
18
+ point[:x].is_a?(Numeric) && point[:y].is_a?(Numeric)
19
+ end
20
+
21
+ points
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,10 @@
1
+ module Cluda
2
+ class Euclidean < Distance
3
+
4
+ def self.distance(x0, x)
5
+ validate( [x0,x] )
6
+
7
+ sqrt( (x0[:x] - x[:x]) ** 2 + (x0[:y] - x[:y]) ** 2 )
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ module Cluda
2
+ class Manhattan < Distance
3
+
4
+ def self.distance(x0, x)
5
+ validate( [x0,x] )
6
+
7
+ (x0[:x] - x[:x]).abs + (x0[:y] - x[:y]).abs
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,95 @@
1
+ require 'cluda/distances/manhattan'
2
+ require 'cluda/distances/euclidean'
3
+ require 'cluda/distances/chebyshev'
4
+
5
+ module Cluda
6
+ class InvalidDistanceMethod < RuntimeError; end
7
+
8
+ class Kmeans
9
+ def self.classify( list, k, class_name = 'euclidean', max_iterations = 50 )
10
+ raise InvalidDistanceMethod unless valid_class?(class_name)
11
+
12
+ _class = Cluda.const_get( class_name.downcase.capitalize )
13
+ _class.validate( list )
14
+
15
+ iter = 1
16
+ previous_centroids = nil
17
+ centroids = initialize_centroids( list , k, _class )
18
+
19
+ while (iter < max_iterations) && (previous_centroids != centroids)
20
+ output = init_output(centroids)
21
+
22
+ list.each do |point|
23
+ output[nearest_centroid(point, centroids, _class)] << point
24
+ end
25
+
26
+ iter += 1
27
+ previous_centroids = centroids
28
+ centroids = move_centroids( output )
29
+ end
30
+
31
+ output
32
+ end
33
+
34
+ protected
35
+
36
+ def self.nearest_centroid(point, centroids, _class = Cluda::Euclidean )
37
+ return nil if centroids.empty?
38
+
39
+ _class.validate( point )
40
+
41
+ nearest_centroid = centroids[0]
42
+ min_distance = _class.distance(point, nearest_centroid)
43
+
44
+ centroids.each do |centroid|
45
+ new_distance = _class.distance(point, centroid)
46
+ if new_distance < min_distance
47
+ min_distance = new_distance
48
+ nearest_centroid = centroid
49
+ end
50
+ end
51
+
52
+ nearest_centroid
53
+ end
54
+
55
+ def self.initialize_centroids( list , k, _class = Cluda::Euclidean )
56
+ _class.validate( list )
57
+
58
+ return [] if list.empty? || k > list.size
59
+
60
+ list.shuffle( random: Random.new.rand(0...k) )[0...k]
61
+ end
62
+
63
+ private
64
+
65
+ def self.valid_class?( name )
66
+ ['euclidean', 'chebyshev', 'manhattan'].include?( name.downcase )
67
+ end
68
+
69
+ def self.init_output(centroids)
70
+ centroids.each_with_object({}) do |centroid, memo|
71
+ memo[centroid] = []
72
+ end
73
+ end
74
+
75
+ def self.median( list )
76
+ sorted_list = list.sort
77
+ len = list.size
78
+
79
+ sorted_list[( (len / 2 ) + 0.5 ).floor]
80
+ end
81
+
82
+ def self.get_key_values( points, key )
83
+ points.map { |point| point[key] }
84
+ end
85
+
86
+ def self.move_centroids( output )
87
+ output.map do |(key, value)|
88
+ x = median( get_key_values(value, :x) )
89
+ y = median( get_key_values(value, :y) )
90
+
91
+ { x: x, y: y }
92
+ end
93
+ end
94
+ end
95
+ end
data/lib/cluda.rb ADDED
@@ -0,0 +1,4 @@
1
+ $:.unshift File.expand_path('..', __FILE__ )
2
+
3
+ require 'cluda/distances/distance'
4
+ require 'cluda/kmeans'
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cluda
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Enrique Figuerola
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-01-29 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.11.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.11.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: CLustering Data Analysis gem
47
+ email: hard_rock15@msn.com
48
+ executables: []
49
+ extensions: []
50
+ extra_rdoc_files: []
51
+ files:
52
+ - lib/cluda.rb
53
+ - lib/cluda/kmeans.rb
54
+ - lib/cluda/distances/distance.rb
55
+ - lib/cluda/distances/euclidean.rb
56
+ - lib/cluda/distances/manhattan.rb
57
+ - lib/cluda/distances/chebyshev.rb
58
+ homepage: https://github.com/emfigo/cluda
59
+ licenses:
60
+ - MIT
61
+ post_install_message:
62
+ rdoc_options: []
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ requirements: []
78
+ rubyforge_project:
79
+ rubygems_version: 1.8.23
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: CLuDA
83
+ test_files: []