kmeans-clusterer 0.11.2 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-clusterer.rb +12 -23
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5074292fdb3b22c58c295514f9f80c255bd9c38b
4
- data.tar.gz: a2ed2f4c4d288755d551444ac279e57762e14dd3
3
+ metadata.gz: 07e426d9d9009e4ea9fc432f08b8390f8bb69bc6
4
+ data.tar.gz: 8cd3c31df1d33de334c63db296fe2d40f4c56eac
5
5
  SHA512:
6
- metadata.gz: 896ab8eae01c177a4c70550ee73ce22fd57d044c03e099d1148cf017adf1b9be1d9663571a9dcfc23b744776d0d4689e36e5967a3a4fd794a34d08b8026c5e84
7
- data.tar.gz: d19418742a21b6d7302d2be7cca6701553c82f3cd2b5cfc6e4289948b2c4f32cc1f05e7861af88f3014538b0faf1f7430e39482732c23b8a0ff9924144a74e65
6
+ metadata.gz: ffeb5fd9e2167411314674110cb618d5de43460fd32d01572bd400e7905fd7f94052624fa013c3d1309b08c10d5f1afc92b24e87ff9f4eb302c25639f53f3e32
7
+ data.tar.gz: a4e10c472ec5fc84b4a1444da6fadf5c246310a2aef0f701d58af7cd62231a9aac53fff0c3200ed48badfab5b35b117a9ac8be23aa986cfaa3636a37686fcf12
@@ -260,10 +260,14 @@ class KMeansClusterer
260
260
  point_distances = Distance.euclidean @data, @data
261
261
 
262
262
  scores = @points.map do |point|
263
- sort_index = point.centroid_distances.sort_index
264
- c1, c2 = sort_index[0], sort_index[1]
265
- a = dissimilarity point.id, c1, point_distances
266
- b = dissimilarity point.id, c2, point_distances
263
+ dissimilarities = @clusters.map do |cluster|
264
+ dissimilarity(point.id, cluster.id, point_distances)
265
+ end
266
+ a = dissimilarities[point.cluster.id]
267
+ # set to Infinity so we can pick next closest via min()
268
+ dissimilarities[point.cluster.id] = Float::INFINITY
269
+ b = dissimilarities.min
270
+
267
271
  (b - a) / [a,b].max
268
272
  end
269
273
 
@@ -279,7 +283,7 @@ class KMeansClusterer
279
283
  def dissimilarity point_id, cluster_id, point_distances
280
284
  cluster_point_ids = @cluster_assigns.eq(cluster_id).where
281
285
  cluster_point_distances = point_distances[cluster_point_ids, point_id]
282
- cluster_point_distances.sum / cluster_point_distances.length
286
+ cluster_point_distances.mean
283
287
  end
284
288
 
285
289
  def init_centroids
@@ -301,16 +305,11 @@ class KMeansClusterer
301
305
 
302
306
  while centroid_ids.length < @k
303
307
  centroids = @data[true, centroid_ids]
304
-
305
308
  distances = Distance.euclidean(centroids, @data, @row_norms)
309
+
310
+ # squared distances of each point to the nearest centroid
311
+ d2 = NArray.ref(distances.min(1).flatten)**2
306
312
 
307
- d2 = []
308
- @points_count.times do |i|
309
- min_distance = distances[i, true].min
310
- d2 << min_distance**2
311
- end
312
-
313
- d2 = NArray.cast(d2, @typecode)
314
313
  probs = d2 / d2.sum
315
314
  cumprobs = probs.cumsum
316
315
  r = rand
@@ -334,16 +333,6 @@ class KMeansClusterer
334
333
  @points_count.times.to_a.sample @k
335
334
  end
336
335
 
337
- def get_centroid i
338
- NArray.ref(@centroids[true, i].flatten)
339
- end
340
-
341
- def get_points_for_cluster i
342
- point_ids = @cluster_assigns.eq(i).where
343
- points = @data[true, point_ids]
344
- points.empty? ? NArray.sfloat(0) : NArray.ref(points)
345
- end
346
-
347
336
  def origin
348
337
  Array.new(@points[0].dimension, 0)
349
338
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-clusterer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.2
4
+ version: 0.11.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Geoff Buesing
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-03 00:00:00.000000000 Z
11
+ date: 2015-04-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray