cluda 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/cluda/distances/chebyshev.rb +10 -0
- data/lib/cluda/distances/distance.rb +24 -0
- data/lib/cluda/distances/euclidean.rb +10 -0
- data/lib/cluda/distances/manhattan.rb +10 -0
- data/lib/cluda/kmeans.rb +95 -0
- data/lib/cluda.rb +4 -0
- metadata +83 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module Cluda
|
2
|
+
class InvalidPoint < RuntimeError; end
|
3
|
+
|
4
|
+
class Distance
|
5
|
+
extend Math
|
6
|
+
|
7
|
+
def self.distance(x0, x)
|
8
|
+
raise ::NotImplementedError.new("You must implement distance method")
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
|
13
|
+
def self.validate( data )
|
14
|
+
points = data.is_a?(Array) ? data : [ data ]
|
15
|
+
points.each do |point|
|
16
|
+
raise InvalidPoint unless point.is_a?(Hash) &&
|
17
|
+
point.include?(:x) && point.include?(:y) &&
|
18
|
+
point[:x].is_a?(Numeric) && point[:y].is_a?(Numeric)
|
19
|
+
end
|
20
|
+
|
21
|
+
points
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/cluda/kmeans.rb
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'cluda/distances/manhattan'
|
2
|
+
require 'cluda/distances/euclidean'
|
3
|
+
require 'cluda/distances/chebyshev'
|
4
|
+
|
5
|
+
module Cluda
|
6
|
+
class InvalidDistanceMethod < RuntimeError; end
|
7
|
+
|
8
|
+
class Kmeans
|
9
|
+
def self.classify( list, k, class_name = 'euclidean', max_iterations = 50 )
|
10
|
+
raise InvalidDistanceMethod unless valid_class?(class_name)
|
11
|
+
|
12
|
+
_class = Cluda.const_get( class_name.downcase.capitalize )
|
13
|
+
_class.validate( list )
|
14
|
+
|
15
|
+
iter = 1
|
16
|
+
previous_centroids = nil
|
17
|
+
centroids = initialize_centroids( list , k, _class )
|
18
|
+
|
19
|
+
while (iter < max_iterations) && (previous_centroids != centroids)
|
20
|
+
output = init_output(centroids)
|
21
|
+
|
22
|
+
list.each do |point|
|
23
|
+
output[nearest_centroid(point, centroids, _class)] << point
|
24
|
+
end
|
25
|
+
|
26
|
+
iter += 1
|
27
|
+
previous_centroids = centroids
|
28
|
+
centroids = move_centroids( output )
|
29
|
+
end
|
30
|
+
|
31
|
+
output
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
|
36
|
+
def self.nearest_centroid(point, centroids, _class = Cluda::Euclidean )
|
37
|
+
return nil if centroids.empty?
|
38
|
+
|
39
|
+
_class.validate( point )
|
40
|
+
|
41
|
+
nearest_centroid = centroids[0]
|
42
|
+
min_distance = _class.distance(point, nearest_centroid)
|
43
|
+
|
44
|
+
centroids.each do |centroid|
|
45
|
+
new_distance = _class.distance(point, centroid)
|
46
|
+
if new_distance < min_distance
|
47
|
+
min_distance = new_distance
|
48
|
+
nearest_centroid = centroid
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
nearest_centroid
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.initialize_centroids( list , k, _class = Cluda::Euclidean )
|
56
|
+
_class.validate( list )
|
57
|
+
|
58
|
+
return [] if list.empty? || k > list.size
|
59
|
+
|
60
|
+
list.shuffle( random: Random.new.rand(0...k) )[0...k]
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def self.valid_class?( name )
|
66
|
+
['euclidean', 'chebyshev', 'manhattan'].include?( name.downcase )
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.init_output(centroids)
|
70
|
+
centroids.each_with_object({}) do |centroid, memo|
|
71
|
+
memo[centroid] = []
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.median( list )
|
76
|
+
sorted_list = list.sort
|
77
|
+
len = list.size
|
78
|
+
|
79
|
+
sorted_list[( (len / 2 ) + 0.5 ).floor]
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.get_key_values( points, key )
|
83
|
+
points.map { |point| point[key] }
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.move_centroids( output )
|
87
|
+
output.map do |(key, value)|
|
88
|
+
x = median( get_key_values(value, :x) )
|
89
|
+
y = median( get_key_values(value, :y) )
|
90
|
+
|
91
|
+
{ x: x, y: y }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
data/lib/cluda.rb
ADDED
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cluda
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Enrique Figuerola
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2014-01-29 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.11.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 2.11.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
description: CLustering Data Analysis gem
|
47
|
+
email: hard_rock15@msn.com
|
48
|
+
executables: []
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- lib/cluda.rb
|
53
|
+
- lib/cluda/kmeans.rb
|
54
|
+
- lib/cluda/distances/distance.rb
|
55
|
+
- lib/cluda/distances/euclidean.rb
|
56
|
+
- lib/cluda/distances/manhattan.rb
|
57
|
+
- lib/cluda/distances/chebyshev.rb
|
58
|
+
homepage: https://github.com/emfigo/cluda
|
59
|
+
licenses:
|
60
|
+
- MIT
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options: []
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>='
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
requirements: []
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.8.23
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: CLuDA
|
83
|
+
test_files: []
|