cluda 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 84fc5617e8ce06d8b83eec5c0e6bfc1ea7cf1c43246f5040078324cf754089c7
4
+ data.tar.gz: 812430fa9d168cdd552b059ffaa2933e930a3d461d28d9e2991a36bf1e44a42c
5
+ SHA512:
6
+ metadata.gz: ff054734dc37f4a806dc3a2635e26eec634138abd6a1ceca982b6c54bc6910be36b2ccf00b5be6d7636e56ebf82b6ca122db783f8529ed8f972fdb3b944c10dd
7
+ data.tar.gz: b9e3fd5d71c2db328303288e17991cd892912f24e85fef0c3f0555a0bd06d42a35bb48ee609423b829f10aa015756946136052cd7fb8c04f5f0e0b67010b8ed0
@@ -1,5 +1,7 @@
1
- $:.unshift File.expand_path('..', __FILE__ )
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift File.expand_path(__dir__)
2
4
 
3
5
  require 'cluda/distances/distance'
4
6
  require 'cluda/cluda_common'
5
- require 'cluda/kmeans'
7
+ require 'cluda/kmeans'
@@ -1,79 +1,78 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Cluda
4
+ module_function
5
+
2
6
  class InvalidPoint < RuntimeError; end
3
7
  class InvalidCentroid < RuntimeError; end
4
8
  class InvalidSmartPoint < RuntimeError; end
5
9
  class InvalidDistanceMethod < RuntimeError; end
6
-
10
+
7
11
  # For an output given by CluDA calculate the means for each centroid
8
12
  #
9
- #Example:
13
+ # Example:
10
14
  # > clusters = {{:x=>2, :y=>2}=>
11
15
  # [{:x=>1, :y=>1, :distance=>1.4142135623730951},
12
16
  # {:x=>2, :y=>1, :distance=>1.0},
13
17
  # {:x=>1, :y=>2, :distance=>1.0},
14
18
  # {:x=>2, :y=>2, :distance=>0.0}]}
15
19
  # > Cluda.median_for_centroids(clusters)
16
- #Arguments:
20
+ # Arguments:
17
21
  # points: ( Hash )
18
- def self.median_for_centroids ( points )
19
- points.keys.each do | centroid |
20
- validate_smart_points( points[centroid] )
22
+ def median_for_centroids(points)
23
+ points.each_key do |centroid|
24
+ validate_smart_points(points[centroid])
21
25
  end
22
26
 
23
27
  points.keys.map do |centroid|
24
- centroid.merge( median: median_for_centroid( centroid, points ) )
28
+ centroid.merge(median: median_for_centroid(centroid, points))
25
29
  end
26
30
  end
27
-
28
- protected
29
-
30
- def self.valid_class?( name )
31
- ['euclidean', 'chebyshev', 'manhattan'].include?( name.downcase )
31
+
32
+ def valid_class?(name)
33
+ %w[euclidean chebyshev manhattan].include?(name.downcase)
32
34
  end
33
35
 
34
- def self.validate_centroids( centroids )
35
- centroids = centroids.is_a?(Array) ? centroids : [ centroids ]
36
-
36
+ def validate_centroids(centroids)
37
+ centroids = centroids.is_a?(Array) ? centroids : [centroids]
38
+
37
39
  validate(centroids)
38
-
40
+
39
41
  centroids.each do |point|
40
- raise InvalidCentroid unless point.include?(:median)
42
+ raise InvalidCentroid unless point.include?(:median)
41
43
  end
42
44
 
43
45
  centroids
44
46
  end
45
-
46
- def self.validate_smart_points( points )
47
- points = points.is_a?(Array) ? points : [ points ]
48
-
49
- validate( points )
50
-
47
+
48
+ def validate_smart_points(points)
49
+ points = points.is_a?(Array) ? points : [points]
50
+
51
+ validate(points)
52
+
51
53
  points.each do |point|
52
- raise InvalidSmartPoint unless point.include?(:distance)
54
+ raise InvalidSmartPoint unless point.include?(:distance)
53
55
  end
54
56
 
55
57
  points
56
58
  end
57
-
58
- def self.validate( data )
59
- points = data.is_a?(Array) ? data : [ data ]
60
- points.each do |point|
61
- raise InvalidPoint unless point.is_a?(Hash) &&
62
- point.include?(:x) && point.include?(:y) &&
63
- point[:x].is_a?(Numeric) && point[:y].is_a?(Numeric)
64
- end
65
59
 
66
- points
60
+ def validate(data)
61
+ data = [data] unless data.is_a?(Array)
62
+
63
+ data.each do |point|
64
+ raise InvalidPoint unless point.is_a?(Hash) && point[:x].is_a?(Numeric) && point[:y].is_a?(Numeric)
65
+ end
67
66
  end
68
67
 
69
- def self.median_for_centroid( centroid, points )
70
- median( points[centroid].map{ |point| point[:distance] } )
68
+ def median_for_centroid(centroid, points)
69
+ median(points[centroid].map { |point| point[:distance] })
71
70
  end
72
71
 
73
- def self.median( list )
72
+ def median(list)
74
73
  sorted_list = list.sort
75
74
  len = list.size
76
75
 
77
- sorted_list[( (len / 2 ) + 0.5 ).floor]
76
+ sorted_list[((len / 2) + 0.5).floor]
78
77
  end
79
78
  end
@@ -1,10 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Cluda
2
- class Chebyshev < Distance
3
-
4
+ class Chebyshev < Distance
4
5
  def self.distance(x0, x)
5
- Cluda.validate( [x0,x] )
6
+ Cluda.validate([x0, x])
6
7
 
7
- [ (x0[:x] - x[:x]).abs, (x0[:y] - x[:y]).abs ].max
8
+ [(x0[:x] - x[:x]).abs, (x0[:y] - x[:y]).abs].max
8
9
  end
9
10
  end
10
11
  end
@@ -1,9 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Cluda
2
- class Distance
4
+ class Distance
3
5
  extend Math
4
-
5
- def self.distance(x0, x)
6
- raise ::NotImplementedError.new("You must implement distance method")
6
+
7
+ def self.distance(_x0, _x)
8
+ raise ::NotImplementedError, 'You must implement distance method'
7
9
  end
8
10
  end
9
11
  end
@@ -1,10 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Cluda
2
- class Euclidean < Distance
3
-
4
+ class Euclidean < Distance
4
5
  def self.distance(x0, x)
5
- Cluda.validate( [x0,x] )
6
+ Cluda.validate([x0, x])
6
7
 
7
- sqrt( (x0[:x] - x[:x]) ** 2 + (x0[:y] - x[:y]) ** 2 )
8
+ sqrt((x0[:x] - x[:x])**2 + (x0[:y] - x[:y])**2)
8
9
  end
9
10
  end
10
11
  end
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Cluda
2
- class Manhattan < Distance
3
-
4
+ class Manhattan < Distance
4
5
  def self.distance(x0, x)
5
- Cluda.validate( [x0,x] )
6
+ Cluda.validate([x0, x])
6
7
 
7
8
  (x0[:x] - x[:x]).abs + (x0[:y] - x[:y]).abs
8
9
  end
@@ -1,135 +1,169 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'cluda/distances/manhattan'
2
4
  require 'cluda/distances/euclidean'
3
5
  require 'cluda/distances/chebyshev'
4
6
 
5
7
  module Cluda
6
8
  class Kmeans
7
-
8
- DEFAULT_OPTS = { k: 1,
9
- centroids: nil,
10
- distance_method: 'euclidean',
9
+ DEFAULT_OPTS = { k: 1,
10
+ centroids: nil,
11
+ distance_method: 'euclidean',
11
12
  be_smart: false,
12
13
  margin_distance_percentage: 0,
13
- max_iterations: 50 }
14
-
15
- #Classify the points using KMeans as the clustering algorithm
16
- #
17
- #Example:
18
- # >> points = [ { x: 1, y: 1}, { x: 2, y: 1}, { x: 1, y: 2}, { x: 2, y: 2}, { x: 4, y: 6}, { x: 5, y: 7}, { x: 5, y: 6}, { x: 5, y: 5}, { x: 6, y: 6}, { x: 6, y: 5}]
19
- # >> Cluda::Kmeans.classify( points, k: 1, distance_method: 'euclidean', be_smart: true, max_iterations: 50)
20
- #Arguments:
21
- # list: (Array [Hash] )
22
- # k: (Numeric) *optional*
23
- # centroids: (Array) *optional*
24
- # distance_method: (String) *optional*
25
- # be_smart: (Boolean) *optional* [If you want CluDA to be smart you have to specify the centroids ]
26
- # margin_distance_percentage: (Numeric) *optional* [Between 0 and 1]
27
- # max_iterations: (Numeric) *optional*
28
- def self.classify( list, opts = {} )
29
- @opts = DEFAULT_OPTS.merge(opts)
30
-
31
- raise Cluda::InvalidDistanceMethod unless Cluda::valid_class?(@opts[:distance_method])
32
-
33
- _class = Cluda.const_get( @opts[:distance_method].downcase.capitalize )
34
-
35
- Cluda.validate( list )
36
- Cluda.validate_centroids( @opts[:centroids] ) if @opts[:be_smart]
37
-
38
- iter = 1
39
- max_iterations = @opts[:max_iterations]
40
- centroids = @opts[:centroids].nil? || @opts[:centroids].empty? ? initialize_centroids( list , @opts[:k]) : process_centroids( @opts[:centroids] )
41
- previous_centroids = nil
42
- smart_clustering = @opts[:be_smart]
43
- margin_distance_percentage = @opts[:margin_distance_percentage]
44
-
45
- while (iter < max_iterations) && (previous_centroids != centroids)
46
- output = init_output(centroids)
47
- margin = smart_clustering ? @median_centroid * margin_distance_percentage : 0
48
-
49
- list.each do |point|
50
- centroid, distance = nearest_centroid(point, centroids, _class)
51
-
52
- if smart_clustering && distance > ( @median_centroid + margin )
53
- @median_centroid = distance
54
- centroids << point
55
- create_centroid(point, output)
56
- centroid = point
57
- distance = 0
14
+ max_iterations: 50 }.freeze
15
+
16
+ class << self
17
+ # Classify the points using KMeans as the clustering algorithm
18
+ #
19
+ # Example:
20
+ # >> points = [
21
+ # { x: 1, y: 1},
22
+ # { x: 2, y: 1},
23
+ # { x: 1, y: 2},
24
+ # { x: 2, y: 2},
25
+ # { x: 4, y: 6},
26
+ # { x: 5, y: 7},
27
+ # { x: 5, y: 6},
28
+ # { x: 5, y: 5},
29
+ # { x: 6, y: 6},
30
+ # { x: 6, y: 5}
31
+ # ]
32
+ # >> Cluda::Kmeans.classify( points, k: 1, distance_method: 'euclidean', be_smart: true, max_iterations: 50)
33
+ # Arguments:
34
+ # list: (Array [Hash] )
35
+ # k: (Numeric) *optional*
36
+ # centroids: (Array) *optional*
37
+ # distance_method: (String) *optional*
38
+ # [If you want CluDA to be smart you have to specify the centroids ]
39
+ # be_smart: (Boolean) *optional*
40
+ # margin_distance_percentage: (Numeric) *optional* [Between 0 and 1]
41
+ # max_iterations: (Numeric) *optional*
42
+ def classify(list, opts = {})
43
+ Cluda.validate(list)
44
+ Cluda.validate_centroids(opts[:centroids]) if opts[:be_smart]
45
+
46
+ config = generate_config(list, opts)
47
+
48
+ raise Cluda::InvalidDistanceMethod unless Cluda.valid_class?(config[:distance_method])
49
+
50
+ calculate_clusters(list, **config)
51
+ end
52
+
53
+ def generate_config(list, opts)
54
+ config = DEFAULT_OPTS.merge(opts)
55
+
56
+ centroids_present?(config) ? process_centroids(config) : initialize_centroids(list, config)
57
+
58
+ config[:margin] = config[:be_smart] ? config[:median_centroid] * config[:margin_distance_percentage] : 0
59
+
60
+ config
61
+ end
62
+
63
+ def process_centroids(config)
64
+ config[:centroids].each do |point|
65
+ if config[:median_centroid].nil? || config[:median_centroid] < point[:median]
66
+ config[:median_centroid] = point[:median]
58
67
  end
59
68
 
60
- output[centroid] << point.merge( distance: distance )
69
+ point.delete_if { |k, _| !%i[x y].include? k }
61
70
  end
62
71
 
63
- iter += 1
64
- previous_centroids = centroids
65
- centroids = move_centroids( output )
72
+ config
66
73
  end
67
74
 
68
- output
69
- end
75
+ def initialize_centroids(list, config)
76
+ return config if list.empty? || config[:k] > list.size
77
+
78
+ config[:centroids] = list.shuffle(random: Random.new(rand(0...config[:k])))[0...config[:k]]
79
+
80
+ config
81
+ end
82
+
83
+ def nearest_centroid(point, centroids, klass = Cluda::Euclidean)
84
+ return nil if centroids.empty?
85
+
86
+ Cluda.validate(point)
70
87
 
71
- protected
72
-
73
- def self.nearest_centroid(point, centroids, _class = Cluda::Euclidean )
74
- return nil if centroids.empty?
75
-
76
- Cluda.validate( point )
77
-
78
- nearest_centroid = centroids[0]
79
- min_distance = _class.distance(point, nearest_centroid)
80
-
81
- centroids.each do |centroid|
82
- new_distance = _class.distance(point, centroid)
83
- if new_distance < min_distance
84
- min_distance = new_distance
85
- nearest_centroid = centroid
88
+ nearest_centroid = centroids[0]
89
+ min_distance = klass.distance(point, nearest_centroid)
90
+
91
+ centroids.each do |centroid|
92
+ new_distance = klass.distance(point, centroid)
93
+ if new_distance < min_distance
94
+ min_distance = new_distance
95
+ nearest_centroid = centroid
96
+ end
86
97
  end
98
+
99
+ [nearest_centroid, min_distance]
87
100
  end
88
101
 
89
- [nearest_centroid, min_distance]
90
- end
102
+ private
91
103
 
92
- def self.initialize_centroids( list , k )
93
- Cluda.validate( list )
104
+ def calculate_clusters(list, centroids:, distance_method:, **config)
105
+ cluster = {}
94
106
 
95
- return [] if list.empty? || k > list.size
107
+ previous_centroids = nil
108
+ klass = Cluda.const_get(distance_method.downcase.capitalize)
96
109
 
97
- list.shuffle( random: Random.new.rand(0...k) )[0...k]
98
- end
110
+ config[:max_iterations].times do
111
+ break if previous_centroids == centroids
99
112
 
100
- private
113
+ cluster = assign_points_to_centroids(list, centroids, klass, config)
101
114
 
102
- def self.init_output(centroids)
103
- centroids.each_with_object({}) do |centroid, memo|
104
- memo[centroid] = []
115
+ previous_centroids = centroids
116
+ centroids = move_centroids(cluster)
117
+ end
118
+
119
+ cluster
105
120
  end
106
- end
107
121
 
108
- def self.create_centroid( centroid, output )
109
- output[centroid] = []
110
- end
122
+ def centroids_present?(opts)
123
+ !(opts[:centroids].nil? || opts[:centroids].empty?)
124
+ end
125
+
126
+ def init_cluster(centroids)
127
+ centroids.each_with_object({}) do |centroid, memo|
128
+ memo[centroid] = []
129
+ end
130
+ end
111
131
 
112
- def self.process_centroids(centroids)
113
- centroids.each_with_object([]) do |point, memo|
114
- @median_centroid = point[:median] if @median_centroid.nil? || @median_centroid < point[:median]
115
-
116
- memo << { x: point[:x], y: point[:y] }
132
+ def create_centroid(centroid, output)
133
+ output[centroid] = []
117
134
  end
118
- end
119
135
 
120
- def self.get_key_values( points, key )
121
- points.map { |point| point[key] }
122
- end
123
-
124
- def self.move_centroids( output )
125
- output.map do |(key, value)|
126
- unless value.empty?
127
- x = Cluda.median( get_key_values(value, :x) )
128
- y = Cluda.median( get_key_values(value, :y) )
129
-
136
+ def get_key_values(points, key)
137
+ points.map { |point| point[key] }
138
+ end
139
+
140
+ def move_centroids(output)
141
+ output.map do |(_key, value)|
142
+ next if value.empty?
143
+
144
+ x = Cluda.median(get_key_values(value, :x))
145
+ y = Cluda.median(get_key_values(value, :y))
146
+
130
147
  { x: x, y: y }
148
+ end.compact
149
+ end
150
+
151
+ def assign_points_to_centroids(list, centroids, klass, config)
152
+ list.each_with_object({}) do |point, cluster|
153
+ centroid, distance = nearest_centroid(point, centroids, klass)
154
+
155
+ if config[:be_smart] && distance > (config[:median_centroid] + config[:margin])
156
+ config[:median_centroid] = distance
157
+ centroids << point
158
+ create_centroid(point, cluster)
159
+ centroid = point
160
+ distance = 0
161
+ end
162
+
163
+ cluster[centroid] ||= []
164
+ cluster[centroid] << point.merge(distance: distance)
131
165
  end
132
- end.compact
166
+ end
133
167
  end
134
168
  end
135
169
  end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cluda
4
+ VERSION = '0.1.0'
5
+ end
metadata CHANGED
@@ -1,84 +1,92 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cluda
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
5
- prerelease:
4
+ version: 0.1.0
6
5
  platform: ruby
7
6
  authors:
8
- - Enrique Figuerola
7
+ - Enrique M Figuerola Gomez
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-02-06 00:00:00.000000000 Z
11
+ date: 2020-03-19 00:00:00.000000000 Z
13
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '12.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '12.3'
14
27
  - !ruby/object:Gem::Dependency
15
28
  name: rspec
16
29
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
30
  requirements:
19
- - - ! '>='
31
+ - - "~>"
20
32
  - !ruby/object:Gem::Version
21
- version: 2.11.0
33
+ version: '3.8'
22
34
  type: :development
23
35
  prerelease: false
24
36
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
37
  requirements:
27
- - - ! '>='
38
+ - - "~>"
28
39
  - !ruby/object:Gem::Version
29
- version: 2.11.0
40
+ version: '3.8'
30
41
  - !ruby/object:Gem::Dependency
31
- name: rake
42
+ name: rubocop
32
43
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
44
  requirements:
35
- - - ! '>='
45
+ - - "~>"
36
46
  - !ruby/object:Gem::Version
37
- version: '0'
47
+ version: '0.70'
38
48
  type: :development
39
49
  prerelease: false
40
50
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
51
  requirements:
43
- - - ! '>='
52
+ - - "~>"
44
53
  - !ruby/object:Gem::Version
45
- version: '0'
54
+ version: '0.70'
46
55
  description: CLustering Data Analysis gem
47
- email: hard_rock15@msn.com
56
+ email: me@emfigo.com
48
57
  executables: []
49
58
  extensions: []
50
59
  extra_rdoc_files: []
51
60
  files:
52
61
  - lib/cluda.rb
53
62
  - lib/cluda/cluda_common.rb
54
- - lib/cluda/kmeans.rb
63
+ - lib/cluda/distances/chebyshev.rb
55
64
  - lib/cluda/distances/distance.rb
56
65
  - lib/cluda/distances/euclidean.rb
57
66
  - lib/cluda/distances/manhattan.rb
58
- - lib/cluda/distances/chebyshev.rb
67
+ - lib/cluda/kmeans.rb
68
+ - lib/cluda/version.rb
59
69
  homepage: https://github.com/emfigo/cluda
60
70
  licenses:
61
71
  - MIT
72
+ metadata: {}
62
73
  post_install_message:
63
74
  rdoc_options: []
64
75
  require_paths:
65
76
  - lib
66
77
  required_ruby_version: !ruby/object:Gem::Requirement
67
- none: false
68
78
  requirements:
69
- - - ! '>='
79
+ - - ">="
70
80
  - !ruby/object:Gem::Version
71
- version: '0'
81
+ version: '2.3'
72
82
  required_rubygems_version: !ruby/object:Gem::Requirement
73
- none: false
74
83
  requirements:
75
- - - ! '>='
84
+ - - ">="
76
85
  - !ruby/object:Gem::Version
77
86
  version: '0'
78
87
  requirements: []
79
- rubyforge_project:
80
- rubygems_version: 1.8.23
88
+ rubygems_version: 3.0.3
81
89
  signing_key:
82
- specification_version: 3
90
+ specification_version: 4
83
91
  summary: CLuDA
84
92
  test_files: []