kmeans-crystal 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/kmeans-crystal.rb +101 -0
- metadata +52 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d58aa8e75aff1be703afe7ad5c3d89ff09d17512
|
4
|
+
data.tar.gz: 51c94d22cee9f8c7f46b5a29d60b324b656bd5ae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6aa29a139ed0aff295ea7f17d78f22b40b1130137415cbfa6963bd20f14777df4eec21b960565988bb606b06c29a9695afc341411a4e8019e7ffd4c531ac7271
|
7
|
+
data.tar.gz: b1be6bc5b49282eb210d7d39c6ccc8f66f8dd113f644fe46ebbe2dc98a3d0b07506635fa0cc99c55cd2e5b4b0ab5ded7c2a19c62d1d07dadfc94067a8e3beb11
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module KMeansCrystal
|
2
|
+
class Cluster
|
3
|
+
attr_reader :centroid
|
4
|
+
attr_reader :entries
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize(name, centroid, vector_name)
|
8
|
+
@name = name
|
9
|
+
@centroid = centroid
|
10
|
+
@entries = Array.new
|
11
|
+
@vector_name = vector_name
|
12
|
+
end
|
13
|
+
|
14
|
+
def output
|
15
|
+
return { name: @name, centroid: @centroid, entries: @entries }
|
16
|
+
end
|
17
|
+
|
18
|
+
def distance(entry)
|
19
|
+
sum = 0.0
|
20
|
+
@centroid.size.times{|i| sum += (@centroid[i]-entry[@vector_name][i])**2}
|
21
|
+
return Math.sqrt(sum)
|
22
|
+
end
|
23
|
+
|
24
|
+
def update_centroid
|
25
|
+
new_centroid = Array.new(entries[0][@vector_name].size, 0.0)
|
26
|
+
entries.each do |entry|
|
27
|
+
entry[@vector_name].size.times do |i|
|
28
|
+
new_centroid[i] += entry[@vector_name][i]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
new_centroid.map!{|e| e/entries.size }
|
32
|
+
return new_centroid
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
class Model
|
38
|
+
def initialize(cluster_num, entries, vector_name = :features)
|
39
|
+
raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num
|
40
|
+
@cluster_num = cluster_num
|
41
|
+
@entries = entries
|
42
|
+
@vector_name = vector_name
|
43
|
+
end
|
44
|
+
|
45
|
+
def train
|
46
|
+
init_centroids = @entries.sample(@cluster_num).map{|x| x[@vector_name]}
|
47
|
+
@clusters = new_clusters(init_centroids)
|
48
|
+
i = 0
|
49
|
+
while true do
|
50
|
+
@entries.each do |entry|
|
51
|
+
min_cluster = get_min(@clusters, entry)
|
52
|
+
min_cluster.entries << entry
|
53
|
+
end
|
54
|
+
|
55
|
+
yield(i+=1, @clusters.map{|x| x.output} )
|
56
|
+
|
57
|
+
new_centroids = get_new_centroids(@clusters)
|
58
|
+
@clusters = new_clusters(new_centroids)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def result
|
63
|
+
return @clusters.map{|x| x.output}
|
64
|
+
end
|
65
|
+
|
66
|
+
def predict(entry)
|
67
|
+
raise 'has not been trained' if @clusters.nil?
|
68
|
+
predicted_cluster = get_min(@clusters, entry)
|
69
|
+
return predicted_cluster.name
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def get_new_centroids(clusters)
|
74
|
+
centroids = Array.new
|
75
|
+
clusters.each do |cluster|
|
76
|
+
centroids << cluster.update_centroid
|
77
|
+
end
|
78
|
+
return centroids
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_min(centroids, entry)
|
82
|
+
min_cluster = centroids[0]
|
83
|
+
last_distance = centroids[0].distance(entry)
|
84
|
+
1.upto(centroids.size-1) do |i|
|
85
|
+
current_distance = centroids[i].distance(entry)
|
86
|
+
next if current_distance > last_distance
|
87
|
+
min_cluster = centroids[i]
|
88
|
+
last_distance = current_distance
|
89
|
+
end
|
90
|
+
return min_cluster
|
91
|
+
end
|
92
|
+
|
93
|
+
def new_clusters(centroids)
|
94
|
+
clusters = Array.new
|
95
|
+
centroids.each_with_index do |centroid, i|
|
96
|
+
clusters << Cluster.new("cluster#{i}", centroid, @vector_name)
|
97
|
+
end
|
98
|
+
return clusters
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kmeans-crystal
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ireullin
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-06-29 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: |-
|
14
|
+
The library for data clustering is implemented by k-means algorithm.With the library, you can monitor the model’s training processand end the training if the result is converged.
|
15
|
+
|
16
|
+
這是一個分群用的library。他實作了k-means演算法。透過這個library你可以監看整個model訓練的過程,並且在結果收斂的時候結束訓練。
|
17
|
+
|
18
|
+
https://github.com/ireullin/kmeans-crystal
|
19
|
+
email:
|
20
|
+
- ireullin@gmail.com
|
21
|
+
executables: []
|
22
|
+
extensions: []
|
23
|
+
extra_rdoc_files: []
|
24
|
+
files:
|
25
|
+
- lib/kmeans-crystal.rb
|
26
|
+
homepage: https://github.com/ireullin/kmeans-crystal
|
27
|
+
licenses:
|
28
|
+
- MIT
|
29
|
+
metadata: {}
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
requirements: []
|
45
|
+
rubyforge_project:
|
46
|
+
rubygems_version: 2.4.6
|
47
|
+
signing_key:
|
48
|
+
specification_version: 4
|
49
|
+
summary: With this library, you can monitor the model’s training process and end the
|
50
|
+
training if the result is converged. https://github.com/ireullin/kmeans-crystal
|
51
|
+
test_files: []
|
52
|
+
has_rdoc:
|