RubyGems - kmeans-clusterer - Versions diffs - 0.11.2 → 0.11.3 - Mend

kmeans-clusterer 0.11.2 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5074292fdb3b22c58c295514f9f80c255bd9c38b
-  data.tar.gz: a2ed2f4c4d288755d551444ac279e57762e14dd3
+  metadata.gz: 07e426d9d9009e4ea9fc432f08b8390f8bb69bc6
+  data.tar.gz: 8cd3c31df1d33de334c63db296fe2d40f4c56eac
 SHA512:
-  metadata.gz: 896ab8eae01c177a4c70550ee73ce22fd57d044c03e099d1148cf017adf1b9be1d9663571a9dcfc23b744776d0d4689e36e5967a3a4fd794a34d08b8026c5e84
-  data.tar.gz: d19418742a21b6d7302d2be7cca6701553c82f3cd2b5cfc6e4289948b2c4f32cc1f05e7861af88f3014538b0faf1f7430e39482732c23b8a0ff9924144a74e65
+  metadata.gz: ffeb5fd9e2167411314674110cb618d5de43460fd32d01572bd400e7905fd7f94052624fa013c3d1309b08c10d5f1afc92b24e87ff9f4eb302c25639f53f3e32
+  data.tar.gz: a4e10c472ec5fc84b4a1444da6fadf5c246310a2aef0f701d58af7cd62231a9aac53fff0c3200ed48badfab5b35b117a9ac8be23aa986cfaa3636a37686fcf12

data/lib/kmeans-clusterer.rb CHANGED

@@ -260,10 +260,14 @@ class KMeansClusterer
     point_distances = Distance.euclidean @data, @data
     scores = @points.map do |point|
-      sort_index = point.centroid_distances.sort_index
-      c1, c2 = sort_index[0], sort_index[1]
-      a = dissimilarity point.id, c1, point_distances
-      b = dissimilarity point.id, c2, point_distances
+      dissimilarities = @clusters.map do |cluster|
+        dissimilarity(point.id, cluster.id, point_distances)
+      end
+      a = dissimilarities[point.cluster.id]
+      # set to Infinity so we can pick next closest via min()
+      dissimilarities[point.cluster.id] = Float::INFINITY
+      b = dissimilarities.min
       (b - a) / [a,b].max
     end
@@ -279,7 +283,7 @@ class KMeansClusterer
     def dissimilarity point_id, cluster_id, point_distances
       cluster_point_ids = @cluster_assigns.eq(cluster_id).where
       cluster_point_distances = point_distances[cluster_point_ids, point_id]
-      cluster_point_distances.sum / cluster_point_distances.length
+      cluster_point_distances.mean
     end
     def init_centroids
@@ -301,16 +305,11 @@ class KMeansClusterer
       while centroid_ids.length < @k
         centroids = @data[true, centroid_ids]
         distances = Distance.euclidean(centroids, @data, @row_norms)
+        # squared distances of each point to the nearest centroid
+        d2 = NArray.ref(distances.min(1).flatten)**2
-        d2 = []
-        @points_count.times do |i|
-          min_distance = distances[i, true].min
-          d2 << min_distance**2
-        end
-        d2 = NArray.cast(d2, @typecode)
         probs = d2 / d2.sum
         cumprobs = probs.cumsum
         r = rand
@@ -334,16 +333,6 @@ class KMeansClusterer
       @points_count.times.to_a.sample @k
     end
-    def get_centroid i
-      NArray.ref(@centroids[true, i].flatten)
-    end
-    def get_points_for_cluster i
-      point_ids = @cluster_assigns.eq(i).where
-      points = @data[true, point_ids]
-      points.empty? ? NArray.sfloat(0) : NArray.ref(points)
-    end
     def origin
       Array.new(@points[0].dimension, 0)
     end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: kmeans-clusterer
 version: !ruby/object:Gem::Version
-  version: 0.11.2
+  version: 0.11.3
 platform: ruby
 authors:
 - Geoff Buesing
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-04-03 00:00:00.000000000 Z
+date: 2015-04-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: narray