kmeans-clusterer 0.11.2 → 0.11.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/kmeans-clusterer.rb +12 -23
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07e426d9d9009e4ea9fc432f08b8390f8bb69bc6
|
4
|
+
data.tar.gz: 8cd3c31df1d33de334c63db296fe2d40f4c56eac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffeb5fd9e2167411314674110cb618d5de43460fd32d01572bd400e7905fd7f94052624fa013c3d1309b08c10d5f1afc92b24e87ff9f4eb302c25639f53f3e32
|
7
|
+
data.tar.gz: a4e10c472ec5fc84b4a1444da6fadf5c246310a2aef0f701d58af7cd62231a9aac53fff0c3200ed48badfab5b35b117a9ac8be23aa986cfaa3636a37686fcf12
|
data/lib/kmeans-clusterer.rb
CHANGED
@@ -260,10 +260,14 @@ class KMeansClusterer
|
|
260
260
|
point_distances = Distance.euclidean @data, @data
|
261
261
|
|
262
262
|
scores = @points.map do |point|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
263
|
+
dissimilarities = @clusters.map do |cluster|
|
264
|
+
dissimilarity(point.id, cluster.id, point_distances)
|
265
|
+
end
|
266
|
+
a = dissimilarities[point.cluster.id]
|
267
|
+
# set to Infinity so we can pick next closest via min()
|
268
|
+
dissimilarities[point.cluster.id] = Float::INFINITY
|
269
|
+
b = dissimilarities.min
|
270
|
+
|
267
271
|
(b - a) / [a,b].max
|
268
272
|
end
|
269
273
|
|
@@ -279,7 +283,7 @@ class KMeansClusterer
|
|
279
283
|
def dissimilarity point_id, cluster_id, point_distances
|
280
284
|
cluster_point_ids = @cluster_assigns.eq(cluster_id).where
|
281
285
|
cluster_point_distances = point_distances[cluster_point_ids, point_id]
|
282
|
-
cluster_point_distances.
|
286
|
+
cluster_point_distances.mean
|
283
287
|
end
|
284
288
|
|
285
289
|
def init_centroids
|
@@ -301,16 +305,11 @@ class KMeansClusterer
|
|
301
305
|
|
302
306
|
while centroid_ids.length < @k
|
303
307
|
centroids = @data[true, centroid_ids]
|
304
|
-
|
305
308
|
distances = Distance.euclidean(centroids, @data, @row_norms)
|
309
|
+
|
310
|
+
# squared distances of each point to the nearest centroid
|
311
|
+
d2 = NArray.ref(distances.min(1).flatten)**2
|
306
312
|
|
307
|
-
d2 = []
|
308
|
-
@points_count.times do |i|
|
309
|
-
min_distance = distances[i, true].min
|
310
|
-
d2 << min_distance**2
|
311
|
-
end
|
312
|
-
|
313
|
-
d2 = NArray.cast(d2, @typecode)
|
314
313
|
probs = d2 / d2.sum
|
315
314
|
cumprobs = probs.cumsum
|
316
315
|
r = rand
|
@@ -334,16 +333,6 @@ class KMeansClusterer
|
|
334
333
|
@points_count.times.to_a.sample @k
|
335
334
|
end
|
336
335
|
|
337
|
-
def get_centroid i
|
338
|
-
NArray.ref(@centroids[true, i].flatten)
|
339
|
-
end
|
340
|
-
|
341
|
-
def get_points_for_cluster i
|
342
|
-
point_ids = @cluster_assigns.eq(i).where
|
343
|
-
points = @data[true, point_ids]
|
344
|
-
points.empty? ? NArray.sfloat(0) : NArray.ref(points)
|
345
|
-
end
|
346
|
-
|
347
336
|
def origin
|
348
337
|
Array.new(@points[0].dimension, 0)
|
349
338
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans-clusterer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Geoff Buesing
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|