kmeans-clusterer 0.11.2 → 0.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kmeans-clusterer.rb +12 -23
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 07e426d9d9009e4ea9fc432f08b8390f8bb69bc6
|
4
|
+
data.tar.gz: 8cd3c31df1d33de334c63db296fe2d40f4c56eac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ffeb5fd9e2167411314674110cb618d5de43460fd32d01572bd400e7905fd7f94052624fa013c3d1309b08c10d5f1afc92b24e87ff9f4eb302c25639f53f3e32
|
7
|
+
data.tar.gz: a4e10c472ec5fc84b4a1444da6fadf5c246310a2aef0f701d58af7cd62231a9aac53fff0c3200ed48badfab5b35b117a9ac8be23aa986cfaa3636a37686fcf12
|
data/lib/kmeans-clusterer.rb
CHANGED
@@ -260,10 +260,14 @@ class KMeansClusterer
|
|
260
260
|
point_distances = Distance.euclidean @data, @data
|
261
261
|
|
262
262
|
scores = @points.map do |point|
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
263
|
+
dissimilarities = @clusters.map do |cluster|
|
264
|
+
dissimilarity(point.id, cluster.id, point_distances)
|
265
|
+
end
|
266
|
+
a = dissimilarities[point.cluster.id]
|
267
|
+
# set to Infinity so we can pick next closest via min()
|
268
|
+
dissimilarities[point.cluster.id] = Float::INFINITY
|
269
|
+
b = dissimilarities.min
|
270
|
+
|
267
271
|
(b - a) / [a,b].max
|
268
272
|
end
|
269
273
|
|
@@ -279,7 +283,7 @@ class KMeansClusterer
|
|
279
283
|
def dissimilarity point_id, cluster_id, point_distances
|
280
284
|
cluster_point_ids = @cluster_assigns.eq(cluster_id).where
|
281
285
|
cluster_point_distances = point_distances[cluster_point_ids, point_id]
|
282
|
-
cluster_point_distances.
|
286
|
+
cluster_point_distances.mean
|
283
287
|
end
|
284
288
|
|
285
289
|
def init_centroids
|
@@ -301,16 +305,11 @@ class KMeansClusterer
|
|
301
305
|
|
302
306
|
while centroid_ids.length < @k
|
303
307
|
centroids = @data[true, centroid_ids]
|
304
|
-
|
305
308
|
distances = Distance.euclidean(centroids, @data, @row_norms)
|
309
|
+
|
310
|
+
# squared distances of each point to the nearest centroid
|
311
|
+
d2 = NArray.ref(distances.min(1).flatten)**2
|
306
312
|
|
307
|
-
d2 = []
|
308
|
-
@points_count.times do |i|
|
309
|
-
min_distance = distances[i, true].min
|
310
|
-
d2 << min_distance**2
|
311
|
-
end
|
312
|
-
|
313
|
-
d2 = NArray.cast(d2, @typecode)
|
314
313
|
probs = d2 / d2.sum
|
315
314
|
cumprobs = probs.cumsum
|
316
315
|
r = rand
|
@@ -334,16 +333,6 @@ class KMeansClusterer
|
|
334
333
|
@points_count.times.to_a.sample @k
|
335
334
|
end
|
336
335
|
|
337
|
-
def get_centroid i
|
338
|
-
NArray.ref(@centroids[true, i].flatten)
|
339
|
-
end
|
340
|
-
|
341
|
-
def get_points_for_cluster i
|
342
|
-
point_ids = @cluster_assigns.eq(i).where
|
343
|
-
points = @data[true, point_ids]
|
344
|
-
points.empty? ? NArray.sfloat(0) : NArray.ref(points)
|
345
|
-
end
|
346
|
-
|
347
336
|
def origin
|
348
337
|
Array.new(@points[0].dimension, 0)
|
349
338
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans-clusterer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Geoff Buesing
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|