kmeans-clusterer 0.11.1 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-clusterer.rb +11 -8
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 266be0ce63154b6d9dd3928ae7901b42795be4eb
4
- data.tar.gz: 4be98d5b084e2d27473ca3bee802b442d2b7690a
3
+ metadata.gz: 5074292fdb3b22c58c295514f9f80c255bd9c38b
4
+ data.tar.gz: a2ed2f4c4d288755d551444ac279e57762e14dd3
5
5
  SHA512:
6
- metadata.gz: 4afc940658e70c7c0871fe3694d8e220f0048a66191b235ca179695c1366f61ce50b8793d5f107e95cd074110e97903ba3ff36f7116655b9cb047c94193ada6c
7
- data.tar.gz: 24a9be6db3a93a5a64c71f6820f62bcc96aa44adbfb35fd7007c882242e82edd4fec9151bc1bf7eaa13cd763f9a9170f382c2ffab6e226f1850d631105871045
6
+ metadata.gz: 896ab8eae01c177a4c70550ee73ce22fd57d044c03e099d1148cf017adf1b9be1d9663571a9dcfc23b744776d0d4689e36e5967a3a4fd794a34d08b8026c5e84
7
+ data.tar.gz: d19418742a21b6d7302d2be7cca6701553c82f3cd2b5cfc6e4289948b2c4f32cc1f05e7861af88f3014538b0faf1f7430e39482732c23b8a0ff9924144a74e65
@@ -255,13 +255,15 @@ class KMeansClusterer
255
255
  def silhouette
256
256
  return 1.0 if @k < 2
257
257
 
258
+ # calculate all point-to-point distances at once
259
+ # uses more memory, but much faster
260
+ point_distances = Distance.euclidean @data, @data
261
+
258
262
  scores = @points.map do |point|
259
263
  sort_index = point.centroid_distances.sort_index
260
- c1_points = get_points_for_cluster sort_index[0]
261
- c2_points = get_points_for_cluster sort_index[1]
262
-
263
- a = dissimilarity(c1_points, point.data)
264
- b = dissimilarity(c2_points, point.data)
264
+ c1, c2 = sort_index[0], sort_index[1]
265
+ a = dissimilarity point.id, c1, point_distances
266
+ b = dissimilarity point.id, c2, point_distances
265
267
  (b - a) / [a,b].max
266
268
  end
267
269
 
@@ -274,9 +276,10 @@ class KMeansClusterer
274
276
 
275
277
  private
276
278
 
277
- def dissimilarity points, point
278
- distances = Distance.euclidean points, point
279
- distances.sum / distances.length.to_f
279
+ def dissimilarity point_id, cluster_id, point_distances
280
+ cluster_point_ids = @cluster_assigns.eq(cluster_id).where
281
+ cluster_point_distances = point_distances[cluster_point_ids, point_id]
282
+ cluster_point_distances.sum / cluster_point_distances.length
280
283
  end
281
284
 
282
285
  def init_centroids
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-clusterer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.11.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Geoff Buesing
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-02 00:00:00.000000000 Z
11
+ date: 2015-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: narray