kmeans-crystal 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kmeans-crystal.rb +101 -0
  3. metadata +52 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d58aa8e75aff1be703afe7ad5c3d89ff09d17512
4
+ data.tar.gz: 51c94d22cee9f8c7f46b5a29d60b324b656bd5ae
5
+ SHA512:
6
+ metadata.gz: 6aa29a139ed0aff295ea7f17d78f22b40b1130137415cbfa6963bd20f14777df4eec21b960565988bb606b06c29a9695afc341411a4e8019e7ffd4c531ac7271
7
+ data.tar.gz: b1be6bc5b49282eb210d7d39c6ccc8f66f8dd113f644fe46ebbe2dc98a3d0b07506635fa0cc99c55cd2e5b4b0ab5ded7c2a19c62d1d07dadfc94067a8e3beb11
@@ -0,0 +1,101 @@
1
+ module KMeansCrystal
2
+ class Cluster
3
+ attr_reader :centroid
4
+ attr_reader :entries
5
+ attr_reader :name
6
+
7
+ def initialize(name, centroid, vector_name)
8
+ @name = name
9
+ @centroid = centroid
10
+ @entries = Array.new
11
+ @vector_name = vector_name
12
+ end
13
+
14
+ def output
15
+ return { name: @name, centroid: @centroid, entries: @entries }
16
+ end
17
+
18
+ def distance(entry)
19
+ sum = 0.0
20
+ @centroid.size.times{|i| sum += (@centroid[i]-entry[@vector_name][i])**2}
21
+ return Math.sqrt(sum)
22
+ end
23
+
24
+ def update_centroid
25
+ new_centroid = Array.new(entries[0][@vector_name].size, 0.0)
26
+ entries.each do |entry|
27
+ entry[@vector_name].size.times do |i|
28
+ new_centroid[i] += entry[@vector_name][i]
29
+ end
30
+ end
31
+ new_centroid.map!{|e| e/entries.size }
32
+ return new_centroid
33
+ end
34
+ end
35
+
36
+
37
+ class Model
38
+ def initialize(cluster_num, entries, vector_name = :features)
39
+ raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num
40
+ @cluster_num = cluster_num
41
+ @entries = entries
42
+ @vector_name = vector_name
43
+ end
44
+
45
+ def train
46
+ init_centroids = @entries.sample(@cluster_num).map{|x| x[@vector_name]}
47
+ @clusters = new_clusters(init_centroids)
48
+ i = 0
49
+ while true do
50
+ @entries.each do |entry|
51
+ min_cluster = get_min(@clusters, entry)
52
+ min_cluster.entries << entry
53
+ end
54
+
55
+ yield(i+=1, @clusters.map{|x| x.output} )
56
+
57
+ new_centroids = get_new_centroids(@clusters)
58
+ @clusters = new_clusters(new_centroids)
59
+ end
60
+ end
61
+
62
+ def result
63
+ return @clusters.map{|x| x.output}
64
+ end
65
+
66
+ def predict(entry)
67
+ raise 'has not been trained' if @clusters.nil?
68
+ predicted_cluster = get_min(@clusters, entry)
69
+ return predicted_cluster.name
70
+ end
71
+
72
+ private
73
+ def get_new_centroids(clusters)
74
+ centroids = Array.new
75
+ clusters.each do |cluster|
76
+ centroids << cluster.update_centroid
77
+ end
78
+ return centroids
79
+ end
80
+
81
+ def get_min(centroids, entry)
82
+ min_cluster = centroids[0]
83
+ last_distance = centroids[0].distance(entry)
84
+ 1.upto(centroids.size-1) do |i|
85
+ current_distance = centroids[i].distance(entry)
86
+ next if current_distance > last_distance
87
+ min_cluster = centroids[i]
88
+ last_distance = current_distance
89
+ end
90
+ return min_cluster
91
+ end
92
+
93
+ def new_clusters(centroids)
94
+ clusters = Array.new
95
+ centroids.each_with_index do |centroid, i|
96
+ clusters << Cluster.new("cluster#{i}", centroid, @vector_name)
97
+ end
98
+ return clusters
99
+ end
100
+ end
101
+ end
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kmeans-crystal
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - ireullin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-06-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |-
14
+ The library for data clustering is implemented by k-means algorithm.With the library, you can monitor the model’s training processand end the training if the result is converged.
15
+
16
+ 這是一個分群用的library。他實作了k-means演算法。透過這個library你可以監看整個model訓練的過程,並且在結果收斂的時候結束訓練。
17
+
18
+ https://github.com/ireullin/kmeans-crystal
19
+ email:
20
+ - ireullin@gmail.com
21
+ executables: []
22
+ extensions: []
23
+ extra_rdoc_files: []
24
+ files:
25
+ - lib/kmeans-crystal.rb
26
+ homepage: https://github.com/ireullin/kmeans-crystal
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 2.4.6
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: With this library, you can monitor the model’s training process and end the
50
+ training if the result is converged. https://github.com/ireullin/kmeans-crystal
51
+ test_files: []
52
+ has_rdoc: