kmeans-clusterer 0.11.2 → 0.11.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-clusterer.rb +12 -23
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5074292fdb3b22c58c295514f9f80c255bd9c38b
4
- data.tar.gz: a2ed2f4c4d288755d551444ac279e57762e14dd3
3
+ metadata.gz: 07e426d9d9009e4ea9fc432f08b8390f8bb69bc6
4
+ data.tar.gz: 8cd3c31df1d33de334c63db296fe2d40f4c56eac
5
5
  SHA512:
6
- metadata.gz: 896ab8eae01c177a4c70550ee73ce22fd57d044c03e099d1148cf017adf1b9be1d9663571a9dcfc23b744776d0d4689e36e5967a3a4fd794a34d08b8026c5e84
7
- data.tar.gz: d19418742a21b6d7302d2be7cca6701553c82f3cd2b5cfc6e4289948b2c4f32cc1f05e7861af88f3014538b0faf1f7430e39482732c23b8a0ff9924144a74e65
6
+ metadata.gz: ffeb5fd9e2167411314674110cb618d5de43460fd32d01572bd400e7905fd7f94052624fa013c3d1309b08c10d5f1afc92b24e87ff9f4eb302c25639f53f3e32
7
+ data.tar.gz: a4e10c472ec5fc84b4a1444da6fadf5c246310a2aef0f701d58af7cd62231a9aac53fff0c3200ed48badfab5b35b117a9ac8be23aa986cfaa3636a37686fcf12
@@ -260,10 +260,14 @@ class KMeansClusterer
260
260
  point_distances = Distance.euclidean @data, @data
261
261
 
262
262
  scores = @points.map do |point|
263
- sort_index = point.centroid_distances.sort_index
264
- c1, c2 = sort_index[0], sort_index[1]
265
- a = dissimilarity point.id, c1, point_distances
266
- b = dissimilarity point.id, c2, point_distances
263
+ dissimilarities = @clusters.map do |cluster|
264
+ dissimilarity(point.id, cluster.id, point_distances)
265
+ end
266
+ a = dissimilarities[point.cluster.id]
267
+ # set to Infinity so we can pick next closest via min()
268
+ dissimilarities[point.cluster.id] = Float::INFINITY
269
+ b = dissimilarities.min
270
+
267
271
  (b - a) / [a,b].max
268
272
  end
269
273
 
@@ -279,7 +283,7 @@ class KMeansClusterer
279
283
  def dissimilarity point_id, cluster_id, point_distances
280
284
  cluster_point_ids = @cluster_assigns.eq(cluster_id).where
281
285
  cluster_point_distances = point_distances[cluster_point_ids, point_id]
282
- cluster_point_distances.sum / cluster_point_distances.length
286
+ cluster_point_distances.mean
283
287
  end
284
288
 
285
289
  def init_centroids
@@ -301,16 +305,11 @@ class KMeansClusterer
301
305
 
302
306
  while centroid_ids.length < @k
303
307
  centroids = @data[true, centroid_ids]
304
-
305
308
  distances = Distance.euclidean(centroids, @data, @row_norms)
309
+
310
+ # squared distances of each point to the nearest centroid
311
+ d2 = NArray.ref(distances.min(1).flatten)**2
306
312
 
307
- d2 = []
308
- @points_count.times do |i|
309
- min_distance = distances[i, true].min
310
- d2 << min_distance**2
311
- end
312
-
313
- d2 = NArray.cast(d2, @typecode)
314
313
  probs = d2 / d2.sum
315
314
  cumprobs = probs.cumsum
316
315
  r = rand
@@ -334,16 +333,6 @@ class KMeansClusterer
334
333
  @points_count.times.to_a.sample @k
335
334
  end
336
335
 
337
- def get_centroid i
338
- NArray.ref(@centroids[true, i].flatten)
339
- end
340
-
341
- def get_points_for_cluster i
342
- point_ids = @cluster_assigns.eq(i).where
343
- points = @data[true, point_ids]
344
- points.empty? ? NArray.sfloat(0) : NArray.ref(points)
345
- end
346
-
347
336
  def origin
348
337
  Array.new(@points[0].dimension, 0)
349
338
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-clusterer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.2
4
+ version: 0.11.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Geoff Buesing
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-03 00:00:00.000000000 Z
11
+ date: 2015-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray