kmeans-crystal 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kmeans-crystal.rb +101 -0
  3. metadata +52 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d58aa8e75aff1be703afe7ad5c3d89ff09d17512
4
+ data.tar.gz: 51c94d22cee9f8c7f46b5a29d60b324b656bd5ae
5
+ SHA512:
6
+ metadata.gz: 6aa29a139ed0aff295ea7f17d78f22b40b1130137415cbfa6963bd20f14777df4eec21b960565988bb606b06c29a9695afc341411a4e8019e7ffd4c531ac7271
7
+ data.tar.gz: b1be6bc5b49282eb210d7d39c6ccc8f66f8dd113f644fe46ebbe2dc98a3d0b07506635fa0cc99c55cd2e5b4b0ab5ded7c2a19c62d1d07dadfc94067a8e3beb11
@@ -0,0 +1,101 @@
1
+ module KMeansCrystal
2
+ class Cluster
3
+ attr_reader :centroid
4
+ attr_reader :entries
5
+ attr_reader :name
6
+
7
+ def initialize(name, centroid, vector_name)
8
+ @name = name
9
+ @centroid = centroid
10
+ @entries = Array.new
11
+ @vector_name = vector_name
12
+ end
13
+
14
+ def output
15
+ return { name: @name, centroid: @centroid, entries: @entries }
16
+ end
17
+
18
+ def distance(entry)
19
+ sum = 0.0
20
+ @centroid.size.times{|i| sum += (@centroid[i]-entry[@vector_name][i])**2}
21
+ return Math.sqrt(sum)
22
+ end
23
+
24
+ def update_centroid
25
+ new_centroid = Array.new(entries[0][@vector_name].size, 0.0)
26
+ entries.each do |entry|
27
+ entry[@vector_name].size.times do |i|
28
+ new_centroid[i] += entry[@vector_name][i]
29
+ end
30
+ end
31
+ new_centroid.map!{|e| e/entries.size }
32
+ return new_centroid
33
+ end
34
+ end
35
+
36
+
37
+ class Model
38
+ def initialize(cluster_num, entries, vector_name = :features)
39
+ raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num
40
+ @cluster_num = cluster_num
41
+ @entries = entries
42
+ @vector_name = vector_name
43
+ end
44
+
45
+ def train
46
+ init_centroids = @entries.sample(@cluster_num).map{|x| x[@vector_name]}
47
+ @clusters = new_clusters(init_centroids)
48
+ i = 0
49
+ while true do
50
+ @entries.each do |entry|
51
+ min_cluster = get_min(@clusters, entry)
52
+ min_cluster.entries << entry
53
+ end
54
+
55
+ yield(i+=1, @clusters.map{|x| x.output} )
56
+
57
+ new_centroids = get_new_centroids(@clusters)
58
+ @clusters = new_clusters(new_centroids)
59
+ end
60
+ end
61
+
62
+ def result
63
+ return @clusters.map{|x| x.output}
64
+ end
65
+
66
+ def predict(entry)
67
+ raise 'has not been trained' if @clusters.nil?
68
+ predicted_cluster = get_min(@clusters, entry)
69
+ return predicted_cluster.name
70
+ end
71
+
72
+ private
73
+ def get_new_centroids(clusters)
74
+ centroids = Array.new
75
+ clusters.each do |cluster|
76
+ centroids << cluster.update_centroid
77
+ end
78
+ return centroids
79
+ end
80
+
81
+ def get_min(centroids, entry)
82
+ min_cluster = centroids[0]
83
+ last_distance = centroids[0].distance(entry)
84
+ 1.upto(centroids.size-1) do |i|
85
+ current_distance = centroids[i].distance(entry)
86
+ next if current_distance > last_distance
87
+ min_cluster = centroids[i]
88
+ last_distance = current_distance
89
+ end
90
+ return min_cluster
91
+ end
92
+
93
+ def new_clusters(centroids)
94
+ clusters = Array.new
95
+ centroids.each_with_index do |centroid, i|
96
+ clusters << Cluster.new("cluster#{i}", centroid, @vector_name)
97
+ end
98
+ return clusters
99
+ end
100
+ end
101
+ end
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kmeans-crystal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - ireullin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-06-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |-
14
+ The library for data clustering is implemented by k-means algorithm.With the library, you can monitor the model’s training processand end the training if the result is converged.
15
+
16
+ 這是一個分群用的library。他實作了k-means演算法。透過這個library你可以監看整個model訓練的過程,並且在結果收斂的時候結束訓練。
17
+
18
+ https://github.com/ireullin/kmeans-crystal
19
+ email:
20
+ - ireullin@gmail.com
21
+ executables: []
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - lib/kmeans-crystal.rb
26
+ homepage: https://github.com/ireullin/kmeans-crystal
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 2.4.6
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: With this library, you can monitor the model’s training process and end the
50
+ training if the result is converged. https://github.com/ireullin/kmeans-crystal
51
+ test_files: []
52
+ has_rdoc: