kmeans-clusterer 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kmeans-clusterer.rb +15 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1eef92b1d0554e9bd082619863534ce7f74aa64
|
4
|
+
data.tar.gz: 123a573338accb5c7c68e406e4e691bd5235300c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51dcecd3cf4a8157250d605dcffdc50a0e2d4621436232f438c62aa743d5a01cf06f5c12d01ff214808daf8c62cedb2501b33179c44060cfc0354b2c31d2f592
|
7
|
+
data.tar.gz: 31ad149b1ef15aa0aa64280f4989b7f6476c4529dbf9e15bb3c04497b5756e06438ede5c784a9ef0e179cd96f310414c5f3d80881ec048b046b7e88eef30eb50
|
data/lib/kmeans-clusterer.rb
CHANGED
@@ -87,19 +87,25 @@ class KMeansClusterer
|
|
87
87
|
opts[:points_matrix] = NMatrix.cast(data, NArray::DFLOAT)
|
88
88
|
opts[:row_norms] = opts[:points_matrix].map {|v| v**2}.sum(0)
|
89
89
|
|
90
|
-
|
90
|
+
bestrun = nil
|
91
|
+
|
92
|
+
opts[:runs].times do |i|
|
91
93
|
km = new(opts).run
|
94
|
+
|
92
95
|
if opts[:log]
|
93
96
|
puts "[#{i + 1}] #{km.iterations} iter\t#{km.runtime.round(2)}s\t#{km.error.round(2)} err"
|
94
97
|
end
|
95
|
-
|
98
|
+
|
99
|
+
if bestrun.nil? || (km.error < bestrun.error)
|
100
|
+
bestrun = km
|
101
|
+
end
|
96
102
|
end
|
97
103
|
|
98
|
-
|
104
|
+
bestrun.finish
|
99
105
|
end
|
100
106
|
|
101
107
|
|
102
|
-
attr_reader :k, :points, :clusters, :error, :iterations, :runtime
|
108
|
+
attr_reader :k, :points, :clusters, :error, :mean, :std, :iterations, :runtime
|
103
109
|
|
104
110
|
|
105
111
|
def initialize opts = {}
|
@@ -192,7 +198,7 @@ class KMeansClusterer
|
|
192
198
|
wrap_point Array.new(@points[0].dimension, 0)
|
193
199
|
end
|
194
200
|
|
195
|
-
def
|
201
|
+
def silhouette
|
196
202
|
return 1.0 if @k < 2
|
197
203
|
|
198
204
|
distances = distance(@centroids, @points_matrix)
|
@@ -212,6 +218,8 @@ class KMeansClusterer
|
|
212
218
|
scores.reduce(:+) / scores.length # mean score for all points
|
213
219
|
end
|
214
220
|
|
221
|
+
alias_method :silhouette_score, :silhouette
|
222
|
+
|
215
223
|
private
|
216
224
|
def wrap_point point
|
217
225
|
return point if point.is_a?(Point)
|
@@ -323,7 +331,8 @@ class KMeansClusterer
|
|
323
331
|
|
324
332
|
def get_points_for_centroid i
|
325
333
|
point_ids = @cluster_point_ids[i]
|
326
|
-
|
334
|
+
points = @points_matrix[true, point_ids]
|
335
|
+
points.empty? ? NArray.dfloat(0) : NArray.cast(points)
|
327
336
|
end
|
328
337
|
|
329
338
|
def distance x, y, yy = @row_norms
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans-clusterer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Geoff Buesing
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: k-means clustering. Uses NArray for fast calculations.
|
14
14
|
email: gbuesing@gmail.com
|