kmeans-clusterer 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kmeans-clusterer.rb +16 -10
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef3362502d8b947a79154f0c38f00941399f3192
|
4
|
+
data.tar.gz: 28d240da6a683201cf5f36c13cac2df5c21c31d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a2ee8ee21cd05b6ad0cc16e6db453fdd98bbba71506160b8396d5deff844a3ef7319e20895da191d7bd2562c69c407ee491fa4371c796acc6f2a8082b2ee171
|
7
|
+
data.tar.gz: 98ccfaeb50eb586145853d5da8bffa8b72447c6621431a9663493aaa67867119d8242837ad365cdc6e64e1fb8f3ea5c3b46b5e76d1af577bb2f5effe5465dd3a
|
data/lib/kmeans-clusterer.rb
CHANGED
@@ -15,11 +15,11 @@ class KMeansClusterer
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def self.scale data, mean = nil, std = nil, typecode = nil
|
18
|
-
data = NArray.
|
18
|
+
data = NArray.ref(data)
|
19
19
|
mean ||= self.mean(data)
|
20
20
|
std ||= self.std(data)
|
21
21
|
data = (data - mean) / std
|
22
|
-
[data, mean, std]
|
22
|
+
[NMatrix.ref(data), mean, std]
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
@@ -77,13 +77,15 @@ class KMeansClusterer
|
|
77
77
|
opts[:k] = k
|
78
78
|
opts[:typecode] = TYPECODE[opts[:float_precision]]
|
79
79
|
|
80
|
+
data = NMatrix.cast data, opts[:typecode]
|
81
|
+
|
80
82
|
if opts[:scale_data]
|
81
83
|
data, mean, std = Scaler.scale(data, nil, nil, opts[:typecode])
|
82
84
|
opts[:mean] = mean
|
83
85
|
opts[:std] = std
|
84
86
|
end
|
85
87
|
|
86
|
-
opts[:points_matrix] =
|
88
|
+
opts[:points_matrix] = data
|
87
89
|
opts[:row_norms] = opts[:points_matrix].map {|v| v**2}.sum(0)
|
88
90
|
|
89
91
|
bestrun = nil
|
@@ -144,7 +146,7 @@ class KMeansClusterer
|
|
144
146
|
updated_centroids = []
|
145
147
|
|
146
148
|
@k.times do |i|
|
147
|
-
centroid = NArray.
|
149
|
+
centroid = NArray.ref(@centroids[true, i].flatten)
|
148
150
|
point_ids = @cluster_point_ids[i]
|
149
151
|
|
150
152
|
if point_ids.empty?
|
@@ -179,8 +181,8 @@ class KMeansClusterer
|
|
179
181
|
end
|
180
182
|
|
181
183
|
def predict data
|
182
|
-
data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
|
183
184
|
data = NMatrix.cast(data, @typecode)
|
185
|
+
data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
|
184
186
|
distances = distance(@centroids, data, nil)
|
185
187
|
data.shape[1].times.map do |i|
|
186
188
|
distances[i, true].sort_index[0] # index of closest cluster
|
@@ -220,6 +222,10 @@ class KMeansClusterer
|
|
220
222
|
|
221
223
|
alias_method :silhouette_score, :silhouette
|
222
224
|
|
225
|
+
def inspect
|
226
|
+
%{#<#{self.class.name} k:#{@k} iterations:#{@iterations} error:#{@error} runtime:#{@runtime}>}
|
227
|
+
end
|
228
|
+
|
223
229
|
private
|
224
230
|
def wrap_point point
|
225
231
|
return point if point.is_a?(Point)
|
@@ -289,14 +295,14 @@ class KMeansClusterer
|
|
289
295
|
|
290
296
|
def set_points
|
291
297
|
@points = @points_count.times.map do |i|
|
292
|
-
data = NArray.
|
298
|
+
data = NArray.ref @points_matrix[true, i].flatten
|
293
299
|
Point.new(i, data, @labels[i])
|
294
300
|
end
|
295
301
|
end
|
296
302
|
|
297
303
|
def set_clusters
|
298
304
|
@clusters = @k.times.map do |i|
|
299
|
-
centroid = NArray.
|
305
|
+
centroid = NArray.ref @centroids[true, i].flatten
|
300
306
|
c = Cluster.new i, Point.new(-i, centroid)
|
301
307
|
@cluster_point_ids[i].each do |p|
|
302
308
|
c << @points[p]
|
@@ -322,17 +328,17 @@ class KMeansClusterer
|
|
322
328
|
end
|
323
329
|
|
324
330
|
def get_point i
|
325
|
-
NArray.
|
331
|
+
NArray.ref @points_matrix[true, i].flatten
|
326
332
|
end
|
327
333
|
|
328
334
|
def get_centroid i
|
329
|
-
NArray.
|
335
|
+
NArray.ref(@centroids[true, i].flatten)
|
330
336
|
end
|
331
337
|
|
332
338
|
def get_points_for_centroid i
|
333
339
|
point_ids = @cluster_point_ids[i]
|
334
340
|
points = @points_matrix[true, point_ids]
|
335
|
-
points.empty? ? NArray.sfloat(0) : NArray.
|
341
|
+
points.empty? ? NArray.sfloat(0) : NArray.ref(points)
|
336
342
|
end
|
337
343
|
|
338
344
|
def distance x, y, yy = @row_norms
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans-clusterer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Geoff Buesing
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|