kmeans-crystal 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-crystal.rb +79 -10
  3. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 92da21fd5b192c00cb575b352410fc09a0cf3649
4
- data.tar.gz: 72563e5d97d4a76e58fd128b83894510632e2fea
3
+ metadata.gz: c25646b44067f4a43cca87296ef5d04ff77272c5
4
+ data.tar.gz: 83e65a93b07dff510d80df973affc7671f565198
5
5
  SHA512:
6
- metadata.gz: 6b2e361cfd58f4d6248fd6f6d1136058b555939a2ea5ca617980e809bcab60eeb3ea77a48f662ddfea4edfbb8e7e546edc541de5a66784b5d2f35a2788516fb1
7
- data.tar.gz: 77a92b7b706f01641497976f0e9c0a092a800f358cd01cf13ce9b9ebcd3d331529d6bd45df43da2fa4caca5246adf2cf051bd07e0d357c6aecfd273eacbb5980
6
+ metadata.gz: 52d5da1622b0362238fab40aa34b64a948e381066afc90547f52747aed3220d21cb0980879790bc16bc0a0e9e685b33cef880067bd879c3fae88f17010928837
7
+ data.tar.gz: 719f3197099ad25d359d1758a48cfcd3fe7913f332d31fc931a291075ae795b555dfaa35b2968793dcb170bedb1d34f0ebc9af28dacb8aee05b79c97f2bf8e3c
@@ -1,24 +1,46 @@
1
1
  module KMeansCrystal
2
+
3
+ module Measure
4
+ # 歐式距離
5
+ class Euclidean
6
+ def self.distance(a,b)
7
+ sum = 0.0
8
+ a.size.times{|i| sum += (a[i] -b[i])**2 }
9
+ return Math.sqrt(sum)
10
+ end
11
+ end
12
+
13
+ # 曼哈頓距離
14
+ class Manhattan
15
+ def self.distance(a,b)
16
+ sum = 0.0
17
+ a.size.times{|i| sum += (a[i] -b[i]).abs }
18
+ return Math.sqrt(sum)
19
+ end
20
+ end
21
+ end
22
+
23
+
2
24
  class Cluster
3
25
  attr_reader :centroid
4
26
  attr_reader :entries
5
27
  attr_accessor :name
6
28
 
7
- def initialize(name, centroid, vector_name)
29
+ def initialize(name, centroid, vector_name, measure)
8
30
  @name = name
9
31
  @centroid = centroid
10
32
  @entries = Array.new
11
33
  @vector_name = vector_name
34
+ @measure = measure
12
35
  end
13
36
 
14
37
  def output
15
- return { name: @name, centroid: @centroid, entries: @entries }
38
+ output_entries = @entries.map{|e| e[:distance] = distance(e); e }
39
+ return { name: @name, centroid: @centroid, entries: output_entries }
16
40
  end
17
41
 
18
42
  def distance(entry)
19
- sum = 0.0
20
- @centroid.size.times{|i| sum += (@centroid[i]-entry[@vector_name][i])**2}
21
- return Math.sqrt(sum)
43
+ return @measure.distance(@centroid, entry[@vector_name])
22
44
  end
23
45
 
24
46
  def update_centroid
@@ -37,13 +59,37 @@ end
37
59
 
38
60
 
39
61
  class Model
40
- def initialize(cluster_num, entries, vector_name = :features)
62
+ def initialize(cluster_num, entries, **params)
41
63
  raise 'too less cluster_num to evaluate k-means' if entries.size < cluster_num
64
+
42
65
  @cluster_num = cluster_num
43
66
  @entries = entries
44
- @vector_name = vector_name
45
67
 
46
- init_centroids = @entries.sample(@cluster_num).map{|x| x[@vector_name]}
68
+ @vector_name = case params[:vector_name]
69
+ when nil
70
+ :features
71
+ else
72
+ params[:vector_name]
73
+ end
74
+
75
+ @measure = case params[:distance]
76
+ when 'manhattan'
77
+ Measure::Manhattan
78
+ when 'euclidean',nil
79
+ Measure::Euclidean
80
+ else
81
+ raise 'incorrect value for distance'
82
+ end
83
+
84
+ init_centroids = case params[:init_centroids]
85
+ when 'random'
86
+ @entries.sample(@cluster_num).map{|x| x[@vector_name]}
87
+ when 'kmeans++',nil
88
+ kmeans_pp(@entries, @cluster_num)
89
+ else
90
+ raise 'incorrect value for init_centroids'
91
+ end
92
+
47
93
  @clusters = new_clusters(init_centroids)
48
94
  end
49
95
 
@@ -96,7 +142,7 @@ class Model
96
142
  def new_clusters(centroids)
97
143
  clusters = Array.new
98
144
  centroids.each_with_index do |centroid, i|
99
- clusters << Cluster.new("cluster#{i}", centroid, @vector_name)
145
+ clusters << Cluster.new("cluster#{i}", centroid, @vector_name, @measure)
100
146
  end
101
147
  return clusters
102
148
  end
@@ -104,9 +150,32 @@ class Model
104
150
  def new_clusters_from_old(clusters)
105
151
  arr = Array.new
106
152
  clusters.each do |cluster|
107
- arr << Cluster.new(cluster.name, cluster.update_centroid, @vector_name)
153
+ arr << Cluster.new(cluster.name, cluster.update_centroid, @vector_name, @measure)
108
154
  end
109
155
  return arr
110
156
  end
157
+
158
+ def kmeans_pp(entries, cluster_num)
159
+ features = entries.map{|x| x[@vector_name]}
160
+ dimension = features.first.size
161
+ init_val = Array.new(dimension){[0.0,0.0]}
162
+ dimension.times do |i|
163
+ init_val[i][0] = features.map{|f| f[i]}.to_a.min
164
+ init_val[i][1] = features.map{|f| f[i]}.to_a.max
165
+ end
166
+
167
+ init_centroids = entries.sample(cluster_num).map{|x| x[@vector_name]}
168
+ combination = 2**dimension
169
+ combination.times do |i|
170
+ break if i >= init_centroids.size
171
+ offset = i.to_s(2).rjust(dimension,"0")
172
+ dimension.times do |d|
173
+ max_or_min = offset[d].to_i
174
+ init_centroids[i][d] = init_val[d][max_or_min]
175
+ end
176
+
177
+ end
178
+ return init_centroids
179
+ end
111
180
  end
112
181
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-crystal
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ireullin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-05 00:00:00.000000000 Z
11
+ date: 2017-04-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |-
14
14
  The library for data clustering is implemented by k-means algorithm.With the library, you can monitor the model’s training processand end the training if the result is converged.
@@ -40,10 +40,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
40
40
  version: '0'
41
41
  requirements: []
42
42
  rubyforge_project:
43
- rubygems_version: 2.4.6
43
+ rubygems_version: 2.2.2
44
44
  signing_key:
45
45
  specification_version: 4
46
46
  summary: With this library, you can monitor the model’s training process and end the
47
47
  training if the result is converged.
48
48
  test_files: []
49
- has_rdoc: