kmeans-crystal 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/kmeans-crystal.rb +101 -0
- metadata +52 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d58aa8e75aff1be703afe7ad5c3d89ff09d17512
|
4
|
+
data.tar.gz: 51c94d22cee9f8c7f46b5a29d60b324b656bd5ae
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6aa29a139ed0aff295ea7f17d78f22b40b1130137415cbfa6963bd20f14777df4eec21b960565988bb606b06c29a9695afc341411a4e8019e7ffd4c531ac7271
|
7
|
+
data.tar.gz: b1be6bc5b49282eb210d7d39c6ccc8f66f8dd113f644fe46ebbe2dc98a3d0b07506635fa0cc99c55cd2e5b4b0ab5ded7c2a19c62d1d07dadfc94067a8e3beb11
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module KMeansCrystal
|
2
|
+
class Cluster
|
3
|
+
attr_reader :centroid
|
4
|
+
attr_reader :entries
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
def initialize(name, centroid, vector_name)
|
8
|
+
@name = name
|
9
|
+
@centroid = centroid
|
10
|
+
@entries = Array.new
|
11
|
+
@vector_name = vector_name
|
12
|
+
end
|
13
|
+
|
14
|
+
def output
|
15
|
+
return { name: @name, centroid: @centroid, entries: @entries }
|
16
|
+
end
|
17
|
+
|
18
|
+
def distance(entry)
|
19
|
+
sum = 0.0
|
20
|
+
@centroid.size.times{|i| sum += (@centroid[i]-entry[@vector_name][i])**2}
|
21
|
+
return Math.sqrt(sum)
|
22
|
+
end
|
23
|
+
|
24
|
+
def update_centroid
|
25
|
+
new_centroid = Array.new(entries[0][@vector_name].size, 0.0)
|
26
|
+
entries.each do |entry|
|
27
|
+
entry[@vector_name].size.times do |i|
|
28
|
+
new_centroid[i] += entry[@vector_name][i]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
new_centroid.map!{|e| e/entries.size }
|
32
|
+
return new_centroid
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
class Model
|
38
|
+
def initialize(cluster_num, entries, vector_name = :features)
|
39
|
+
raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num
|
40
|
+
@cluster_num = cluster_num
|
41
|
+
@entries = entries
|
42
|
+
@vector_name = vector_name
|
43
|
+
end
|
44
|
+
|
45
|
+
def train
|
46
|
+
init_centroids = @entries.sample(@cluster_num).map{|x| x[@vector_name]}
|
47
|
+
@clusters = new_clusters(init_centroids)
|
48
|
+
i = 0
|
49
|
+
while true do
|
50
|
+
@entries.each do |entry|
|
51
|
+
min_cluster = get_min(@clusters, entry)
|
52
|
+
min_cluster.entries << entry
|
53
|
+
end
|
54
|
+
|
55
|
+
yield(i+=1, @clusters.map{|x| x.output} )
|
56
|
+
|
57
|
+
new_centroids = get_new_centroids(@clusters)
|
58
|
+
@clusters = new_clusters(new_centroids)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def result
|
63
|
+
return @clusters.map{|x| x.output}
|
64
|
+
end
|
65
|
+
|
66
|
+
def predict(entry)
|
67
|
+
raise 'has not been trained' if @clusters.nil?
|
68
|
+
predicted_cluster = get_min(@clusters, entry)
|
69
|
+
return predicted_cluster.name
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
def get_new_centroids(clusters)
|
74
|
+
centroids = Array.new
|
75
|
+
clusters.each do |cluster|
|
76
|
+
centroids << cluster.update_centroid
|
77
|
+
end
|
78
|
+
return centroids
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_min(centroids, entry)
|
82
|
+
min_cluster = centroids[0]
|
83
|
+
last_distance = centroids[0].distance(entry)
|
84
|
+
1.upto(centroids.size-1) do |i|
|
85
|
+
current_distance = centroids[i].distance(entry)
|
86
|
+
next if current_distance > last_distance
|
87
|
+
min_cluster = centroids[i]
|
88
|
+
last_distance = current_distance
|
89
|
+
end
|
90
|
+
return min_cluster
|
91
|
+
end
|
92
|
+
|
93
|
+
def new_clusters(centroids)
|
94
|
+
clusters = Array.new
|
95
|
+
centroids.each_with_index do |centroid, i|
|
96
|
+
clusters << Cluster.new("cluster#{i}", centroid, @vector_name)
|
97
|
+
end
|
98
|
+
return clusters
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
metadata
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kmeans-crystal
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- ireullin
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-06-29 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: |-
|
14
|
+
The library for data clustering is implemented by k-means algorithm.With the library, you can monitor the model’s training processand end the training if the result is converged.
|
15
|
+
|
16
|
+
這是一個分群用的library。他實作了k-means演算法。透過這個library你可以監看整個model訓練的過程,並且在結果收斂的時候結束訓練。
|
17
|
+
|
18
|
+
https://github.com/ireullin/kmeans-crystal
|
19
|
+
email:
|
20
|
+
- ireullin@gmail.com
|
21
|
+
executables: []
|
22
|
+
extensions: []
|
23
|
+
extra_rdoc_files: []
|
24
|
+
files:
|
25
|
+
- lib/kmeans-crystal.rb
|
26
|
+
homepage: https://github.com/ireullin/kmeans-crystal
|
27
|
+
licenses:
|
28
|
+
- MIT
|
29
|
+
metadata: {}
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
requirements: []
|
45
|
+
rubyforge_project:
|
46
|
+
rubygems_version: 2.4.6
|
47
|
+
signing_key:
|
48
|
+
specification_version: 4
|
49
|
+
summary: With this library, you can monitor the model’s training process and end the
|
50
|
+
training if the result is converged. https://github.com/ireullin/kmeans-crystal
|
51
|
+
test_files: []
|
52
|
+
has_rdoc:
|