kmeans-clusterer 0.11.1 → 0.11.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-clusterer.rb +11 -8
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 266be0ce63154b6d9dd3928ae7901b42795be4eb
4
- data.tar.gz: 4be98d5b084e2d27473ca3bee802b442d2b7690a
3
+ metadata.gz: 5074292fdb3b22c58c295514f9f80c255bd9c38b
4
+ data.tar.gz: a2ed2f4c4d288755d551444ac279e57762e14dd3
5
5
  SHA512:
6
- metadata.gz: 4afc940658e70c7c0871fe3694d8e220f0048a66191b235ca179695c1366f61ce50b8793d5f107e95cd074110e97903ba3ff36f7116655b9cb047c94193ada6c
7
- data.tar.gz: 24a9be6db3a93a5a64c71f6820f62bcc96aa44adbfb35fd7007c882242e82edd4fec9151bc1bf7eaa13cd763f9a9170f382c2ffab6e226f1850d631105871045
6
+ metadata.gz: 896ab8eae01c177a4c70550ee73ce22fd57d044c03e099d1148cf017adf1b9be1d9663571a9dcfc23b744776d0d4689e36e5967a3a4fd794a34d08b8026c5e84
7
+ data.tar.gz: d19418742a21b6d7302d2be7cca6701553c82f3cd2b5cfc6e4289948b2c4f32cc1f05e7861af88f3014538b0faf1f7430e39482732c23b8a0ff9924144a74e65
@@ -255,13 +255,15 @@ class KMeansClusterer
255
255
  def silhouette
256
256
  return 1.0 if @k < 2
257
257
 
258
+ # calculate all point-to-point distances at once
259
+ # uses more memory, but much faster
260
+ point_distances = Distance.euclidean @data, @data
261
+
258
262
  scores = @points.map do |point|
259
263
  sort_index = point.centroid_distances.sort_index
260
- c1_points = get_points_for_cluster sort_index[0]
261
- c2_points = get_points_for_cluster sort_index[1]
262
-
263
- a = dissimilarity(c1_points, point.data)
264
- b = dissimilarity(c2_points, point.data)
264
+ c1, c2 = sort_index[0], sort_index[1]
265
+ a = dissimilarity point.id, c1, point_distances
266
+ b = dissimilarity point.id, c2, point_distances
265
267
  (b - a) / [a,b].max
266
268
  end
267
269
 
@@ -274,9 +276,10 @@ class KMeansClusterer
274
276
 
275
277
  private
276
278
 
277
- def dissimilarity points, point
278
- distances = Distance.euclidean points, point
279
- distances.sum / distances.length.to_f
279
+ def dissimilarity point_id, cluster_id, point_distances
280
+ cluster_point_ids = @cluster_assigns.eq(cluster_id).where
281
+ cluster_point_distances = point_distances[cluster_point_ids, point_id]
282
+ cluster_point_distances.sum / cluster_point_distances.length
280
283
  end
281
284
 
282
285
  def init_centroids
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-clusterer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Geoff Buesing
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-02 00:00:00.000000000 Z
11
+ date: 2015-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray