kmeans-clusterer 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/kmeans-clusterer.rb +16 -10
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef3362502d8b947a79154f0c38f00941399f3192
|
4
|
+
data.tar.gz: 28d240da6a683201cf5f36c13cac2df5c21c31d8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a2ee8ee21cd05b6ad0cc16e6db453fdd98bbba71506160b8396d5deff844a3ef7319e20895da191d7bd2562c69c407ee491fa4371c796acc6f2a8082b2ee171
|
7
|
+
data.tar.gz: 98ccfaeb50eb586145853d5da8bffa8b72447c6621431a9663493aaa67867119d8242837ad365cdc6e64e1fb8f3ea5c3b46b5e76d1af577bb2f5effe5465dd3a
|
data/lib/kmeans-clusterer.rb
CHANGED
@@ -15,11 +15,11 @@ class KMeansClusterer
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def self.scale data, mean = nil, std = nil, typecode = nil
|
18
|
-
data = NArray.
|
18
|
+
data = NArray.ref(data)
|
19
19
|
mean ||= self.mean(data)
|
20
20
|
std ||= self.std(data)
|
21
21
|
data = (data - mean) / std
|
22
|
-
[data, mean, std]
|
22
|
+
[NMatrix.ref(data), mean, std]
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
@@ -77,13 +77,15 @@ class KMeansClusterer
|
|
77
77
|
opts[:k] = k
|
78
78
|
opts[:typecode] = TYPECODE[opts[:float_precision]]
|
79
79
|
|
80
|
+
data = NMatrix.cast data, opts[:typecode]
|
81
|
+
|
80
82
|
if opts[:scale_data]
|
81
83
|
data, mean, std = Scaler.scale(data, nil, nil, opts[:typecode])
|
82
84
|
opts[:mean] = mean
|
83
85
|
opts[:std] = std
|
84
86
|
end
|
85
87
|
|
86
|
-
opts[:points_matrix] =
|
88
|
+
opts[:points_matrix] = data
|
87
89
|
opts[:row_norms] = opts[:points_matrix].map {|v| v**2}.sum(0)
|
88
90
|
|
89
91
|
bestrun = nil
|
@@ -144,7 +146,7 @@ class KMeansClusterer
|
|
144
146
|
updated_centroids = []
|
145
147
|
|
146
148
|
@k.times do |i|
|
147
|
-
centroid = NArray.
|
149
|
+
centroid = NArray.ref(@centroids[true, i].flatten)
|
148
150
|
point_ids = @cluster_point_ids[i]
|
149
151
|
|
150
152
|
if point_ids.empty?
|
@@ -179,8 +181,8 @@ class KMeansClusterer
|
|
179
181
|
end
|
180
182
|
|
181
183
|
def predict data
|
182
|
-
data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
|
183
184
|
data = NMatrix.cast(data, @typecode)
|
185
|
+
data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
|
184
186
|
distances = distance(@centroids, data, nil)
|
185
187
|
data.shape[1].times.map do |i|
|
186
188
|
distances[i, true].sort_index[0] # index of closest cluster
|
@@ -220,6 +222,10 @@ class KMeansClusterer
|
|
220
222
|
|
221
223
|
alias_method :silhouette_score, :silhouette
|
222
224
|
|
225
|
+
def inspect
|
226
|
+
%{#<#{self.class.name} k:#{@k} iterations:#{@iterations} error:#{@error} runtime:#{@runtime}>}
|
227
|
+
end
|
228
|
+
|
223
229
|
private
|
224
230
|
def wrap_point point
|
225
231
|
return point if point.is_a?(Point)
|
@@ -289,14 +295,14 @@ class KMeansClusterer
|
|
289
295
|
|
290
296
|
def set_points
|
291
297
|
@points = @points_count.times.map do |i|
|
292
|
-
data = NArray.
|
298
|
+
data = NArray.ref @points_matrix[true, i].flatten
|
293
299
|
Point.new(i, data, @labels[i])
|
294
300
|
end
|
295
301
|
end
|
296
302
|
|
297
303
|
def set_clusters
|
298
304
|
@clusters = @k.times.map do |i|
|
299
|
-
centroid = NArray.
|
305
|
+
centroid = NArray.ref @centroids[true, i].flatten
|
300
306
|
c = Cluster.new i, Point.new(-i, centroid)
|
301
307
|
@cluster_point_ids[i].each do |p|
|
302
308
|
c << @points[p]
|
@@ -322,17 +328,17 @@ class KMeansClusterer
|
|
322
328
|
end
|
323
329
|
|
324
330
|
def get_point i
|
325
|
-
NArray.
|
331
|
+
NArray.ref @points_matrix[true, i].flatten
|
326
332
|
end
|
327
333
|
|
328
334
|
def get_centroid i
|
329
|
-
NArray.
|
335
|
+
NArray.ref(@centroids[true, i].flatten)
|
330
336
|
end
|
331
337
|
|
332
338
|
def get_points_for_centroid i
|
333
339
|
point_ids = @cluster_point_ids[i]
|
334
340
|
points = @points_matrix[true, point_ids]
|
335
|
-
points.empty? ? NArray.sfloat(0) : NArray.
|
341
|
+
points.empty? ? NArray.sfloat(0) : NArray.ref(points)
|
336
342
|
end
|
337
343
|
|
338
344
|
def distance x, y, yy = @row_norms
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans-clusterer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Geoff Buesing
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|