kmeans-crystal 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-crystal.rb +79 -10
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 92da21fd5b192c00cb575b352410fc09a0cf3649
4
- data.tar.gz: 72563e5d97d4a76e58fd128b83894510632e2fea
3
+ metadata.gz: c25646b44067f4a43cca87296ef5d04ff77272c5
4
+ data.tar.gz: 83e65a93b07dff510d80df973affc7671f565198
5
5
  SHA512:
6
- metadata.gz: 6b2e361cfd58f4d6248fd6f6d1136058b555939a2ea5ca617980e809bcab60eeb3ea77a48f662ddfea4edfbb8e7e546edc541de5a66784b5d2f35a2788516fb1
7
- data.tar.gz: 77a92b7b706f01641497976f0e9c0a092a800f358cd01cf13ce9b9ebcd3d331529d6bd45df43da2fa4caca5246adf2cf051bd07e0d357c6aecfd273eacbb5980
6
+ metadata.gz: 52d5da1622b0362238fab40aa34b64a948e381066afc90547f52747aed3220d21cb0980879790bc16bc0a0e9e685b33cef880067bd879c3fae88f17010928837
7
+ data.tar.gz: 719f3197099ad25d359d1758a48cfcd3fe7913f332d31fc931a291075ae795b555dfaa35b2968793dcb170bedb1d34f0ebc9af28dacb8aee05b79c97f2bf8e3c
@@ -1,24 +1,46 @@
1
1
  module KMeansCrystal
2
+
3
+ module Measure
4
+ # 歐式距離
5
+ class Euclidean
6
+ def self.distance(a,b)
7
+ sum = 0.0
8
+ a.size.times{|i| sum += (a[i] -b[i])**2 }
9
+ return Math.sqrt(sum)
10
+ end
11
+ end
12
+
13
+ # 曼哈頓距離
14
+ class Manhattan
15
+ def self.distance(a,b)
16
+ sum = 0.0
17
+ a.size.times{|i| sum += (a[i] -b[i]).abs }
18
+ return Math.sqrt(sum)
19
+ end
20
+ end
21
+ end
22
+
23
+
2
24
  class Cluster
3
25
  attr_reader :centroid
4
26
  attr_reader :entries
5
27
  attr_accessor :name
6
28
 
7
- def initialize(name, centroid, vector_name)
29
+ def initialize(name, centroid, vector_name, measure)
8
30
  @name = name
9
31
  @centroid = centroid
10
32
  @entries = Array.new
11
33
  @vector_name = vector_name
34
+ @measure = measure
12
35
  end
13
36
 
14
37
  def output
15
- return { name: @name, centroid: @centroid, entries: @entries }
38
+ output_entries = @entries.map{|e| e[:distance] = distance(e); e }
39
+ return { name: @name, centroid: @centroid, entries: output_entries }
16
40
  end
17
41
 
18
42
  def distance(entry)
19
- sum = 0.0
20
- @centroid.size.times{|i| sum += (@centroid[i]-entry[@vector_name][i])**2}
21
- return Math.sqrt(sum)
43
+ return @measure.distance(@centroid, entry[@vector_name])
22
44
  end
23
45
 
24
46
  def update_centroid
@@ -37,13 +59,37 @@ end
37
59
 
38
60
 
39
61
  class Model
40
- def initialize(cluster_num, entries, vector_name = :features)
62
+ def initialize(cluster_num, entries, **params)
41
63
  raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num
64
+
42
65
  @cluster_num = cluster_num
43
66
  @entries = entries
44
- @vector_name = vector_name
45
67
 
46
- init_centroids = @entries.sample(@cluster_num).map{|x| x[@vector_name]}
68
+ @vector_name = case params[:vector_name]
69
+ when nil
70
+ :features
71
+ else
72
+ params[:vector_name]
73
+ end
74
+
75
+ @measure = case params[:distance]
76
+ when 'manhattan'
77
+ Measure::Manhattan
78
+ when 'euclidean',nil
79
+ Measure::Euclidean
80
+ else
81
+ raise 'incorrect value for distance'
82
+ end
83
+
84
+ init_centroids = case params[:init_centroids]
85
+ when 'random'
86
+ @entries.sample(@cluster_num).map{|x| x[@vector_name]}
87
+ when 'kmeans++',nil
88
+ kmeans_pp(@entries, @cluster_num)
89
+ else
90
+ raise 'incorrect value for init_centroids'
91
+ end
92
+
47
93
  @clusters = new_clusters(init_centroids)
48
94
  end
49
95
 
@@ -96,7 +142,7 @@ class Model
96
142
  def new_clusters(centroids)
97
143
  clusters = Array.new
98
144
  centroids.each_with_index do |centroid, i|
99
- clusters << Cluster.new("cluster#{i}", centroid, @vector_name)
145
+ clusters << Cluster.new("cluster#{i}", centroid, @vector_name, @measure)
100
146
  end
101
147
  return clusters
102
148
  end
@@ -104,9 +150,32 @@ class Model
104
150
  def new_clusters_from_old(clusters)
105
151
  arr = Array.new
106
152
  clusters.each do |cluster|
107
- arr << Cluster.new(cluster.name, cluster.update_centroid, @vector_name)
153
+ arr << Cluster.new(cluster.name, cluster.update_centroid, @vector_name, @measure)
108
154
  end
109
155
  return arr
110
156
  end
157
+
158
+ def kmeans_pp(entries, cluster_num)
159
+ features = entries.map{|x| x[@vector_name]}
160
+ dimension = features.first.size
161
+ init_val = Array.new(dimension){[0.0,0.0]}
162
+ dimension.times do |i|
163
+ init_val[i][0] = features.map{|f| f[i]}.to_a.min
164
+ init_val[i][1] = features.map{|f| f[i]}.to_a.max
165
+ end
166
+
167
+ init_centroids = entries.sample(cluster_num).map{|x| x[@vector_name]}
168
+ combination = 2**dimension
169
+ combination.times do |i|
170
+ break if i >= init_centroids.size
171
+ offset = i.to_s(2).rjust(dimension,"0")
172
+ dimension.times do |d|
173
+ max_or_min = offset[d].to_i
174
+ init_centroids[i][d] = init_val[d][max_or_min]
175
+ end
176
+
177
+ end
178
+ return init_centroids
179
+ end
111
180
  end
112
181
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-crystal
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ireullin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-05 00:00:00.000000000 Z
11
+ date: 2017-04-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |-
14
14
  The library for data clustering is implemented by k-means algorithm.With the library, you can monitor the model’s training processand end the training if the result is converged.
@@ -40,10 +40,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  version: '0'
41
41
  requirements: []
42
42
  rubyforge_project:
43
- rubygems_version: 2.4.6
43
+ rubygems_version: 2.2.2
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: With this library, you can monitor the model’s training process and end the
47
47
  training if the result is converged.
48
48
  test_files: []
49
- has_rdoc: