RubyGems - kmeans-clusterer - Versions diffs - 0.7.0 → 0.8.0 - Mend

kmeans-clusterer 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 280cf2639965cde59fcff41909375d284637825d
-  data.tar.gz: e9d9fc0db3828b9bfc37def487200cad77a4d5a7
+  metadata.gz: 38086107da4ad5ec21f6daa33166b4d8542f3a59
+  data.tar.gz: f7fed19b6df6b6c9dbfb21548bc07bd1d997b56e
 SHA512:
-  metadata.gz: 7cd63b7d8f844ea17972ce535d4f0b1ea29d61524351431ada328fdbf30736006739ced8f9c1b344f834c0c993f6491ef203f22761f30ef224795a6d38ee4996
-  data.tar.gz: 63ed5fa36fe27785c124877ac9a56d30190f55aec1a08c6ab161f79a24980f91f553c552f7956fed7d3f9bc25b4d1cbcb98df97d06e51a56c17eb5d290381019
+  metadata.gz: 2e4603c448b30875ba49eb694974ca5e43c0f4270d9d7005608f9bad086eff1bab769f23bb92a8756a024f037a1cc03dd568cb02122d7841c3a28b2556571893
+  data.tar.gz: 40ed7fcf8c7db56d7abf50c072cb7b928c501951e3dbf8a3d2276508c4a5ce2bae2b2bb12159c54ce999cd322b721f12f3b1cf3b46f7903aabc79dbf03f8ada4

data/lib/kmeans-clusterer.rb CHANGED Viewed

@@ -23,6 +23,22 @@ class KMeansClusterer
     end
   end
+  module Distance
+    def self.euclidean x, y, yy = nil
+      if x.is_a?(NMatrix) && y.is_a?(NMatrix)
+        xx = x.map {|v| v**2}.sum(0)
+        yy ||= y.map {|v| v**2}.sum(0)
+        xy = x * y.transpose
+        distance = xy * -2
+        distance += xx
+        distance += yy.transpose
+        NMath.sqrt distance
+      else
+        NMath.sqrt ((x - y)**2).sum(0)
+      end
+    end
+  end
   class Point
     attr_reader :id, :data
@@ -66,6 +82,14 @@ class KMeansClusterer
       point.cluster = self
       @points << point
     end
+    def sorted_points point = @centroid
+      point = point.data if point.is_a?(Point)
+      point = NArray.cast(point, @centroid.typecode) unless point.is_a?(NArray)
+      points_data = NArray.cast(@points.map(&:data))
+      distances = Distance.euclidean(points_data, point)
+      @points.sort_by.with_index {|p, i| distances[i] }
+    end
   end
@@ -108,7 +132,7 @@ class KMeansClusterer
   end
-  attr_reader :k, :points, :clusters, :error, :mean, :std, :iterations, :runtime
+  attr_reader :k, :points, :clusters, :centroids, :error, :mean, :std, :iterations, :runtime
   def initialize opts = {}
@@ -136,7 +160,7 @@ class KMeansClusterer
     loop do
       @iterations +=1
-      distances = distance(@centroids, @points_matrix)
+      distances = Distance.euclidean(@centroids, @points_matrix, @row_norms)
       # assign point ids to @cluster_point_ids
       @points_count.times do |i|
@@ -157,7 +181,7 @@ class KMeansClusterer
         else
           points = @points_matrix[true, point_ids]
           newcenter = points.mean(1)
-          moves << distance(centroid, newcenter)
+          moves << Distance.euclidean(centroid, newcenter)
         end
         updated_centroids << newcenter
@@ -185,27 +209,23 @@ class KMeansClusterer
   def predict data
     data = NMatrix.cast(data, @typecode)
     data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
-    distances = distance(@centroids, data, nil)
+    distances = Distance.euclidean(@centroids, data)
     data.shape[1].times.map do |i|
       distances[i, true].sort_index[0] # index of closest cluster
     end
   end
   def sorted_clusters point = origin
-    point = wrap_point point
-    centroids = get_cluster_centroids
-    distances = distance(centroids, point.data)
+    point = point.data if point.is_a?(Point)
+    point = NArray.cast(point, @typecode) unless point.is_a?(NArray)
+    distances = Distance.euclidean(NArray.ref(@centroids), point)
     @clusters.sort_by.with_index {|c, i| distances[i] }
   end
-  def origin
-    wrap_point Array.new(@points[0].dimension, 0)
-  end
   def silhouette
     return 1.0 if @k < 2
-    distances = distance(@centroids, @points_matrix)
+    distances = Distance.euclidean(@centroids, @points_matrix, @row_norms)
     scores = @points_count.times.map do |i|
       point = get_point i
@@ -222,20 +242,14 @@ class KMeansClusterer
     scores.reduce(:+) / scores.length # mean score for all points
   end
-  alias_method :silhouette_score, :silhouette
   def inspect
     %{#<#{self.class.name} k:#{@k} iterations:#{@iterations} error:#{@error} runtime:#{@runtime}>}
   end
   private
-    def wrap_point point
-      return point if point.is_a?(Point)
-      Point.new(0, NArray.cast(point, @typecode))
-    end
     def dissimilarity points, point
-      distances = distance points, point
+      distances = Distance.euclidean points, point
       distances.sum / distances.length.to_f
     end
@@ -259,7 +273,7 @@ class KMeansClusterer
       while centroid_ids.length < @k
         centroids = @points_matrix[true, centroid_ids]
-        distances = distance(centroids, @points_matrix)
+        distances = Distance.euclidean(centroids, @points_matrix, @row_norms)
         d2 = []
         @points_count.times do |i|
@@ -288,11 +302,7 @@ class KMeansClusterer
     end
     def pick_k_random_indexes
-      @points_count.times.to_a.shuffle.slice(0, @k)
-    end
-    def get_cluster_centroids
-      NArray.to_na @clusters.map {|c| c.centroid.data }
+      @points_count.times.to_a.sample @k
     end
     def set_points
@@ -321,7 +331,7 @@ class KMeansClusterer
         if points.empty?
           0
         else
-          distances = distance points, centroid
+          distances = Distance.euclidean points, centroid
           (distances**2).sum
         end
       end
@@ -343,17 +353,7 @@ class KMeansClusterer
       points.empty? ? NArray.sfloat(0) : NArray.ref(points)
     end
-    def distance x, y, yy = @row_norms
-      if x.is_a?(NMatrix) && y.is_a?(NMatrix)
-        xx = x.map {|v| v**2}.sum(0)
-        yy ||= y.map {|v| v**2}.sum(0)
-        xy = x * y.transpose
-        distance = xy * -2
-        distance += xx
-        distance += yy.transpose
-        NMath.sqrt distance
-      else
-        NMath.sqrt ((x - y)**2).sum(0)
-      end
+    def origin
+      Array.new(@points[0].dimension, 0)
     end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: kmeans-clusterer
 version: !ruby/object:Gem::Version
-  version: 0.7.0
+  version: 0.8.0
 platform: ruby
 authors:
 - Geoff Buesing
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-02-13 00:00:00.000000000 Z
+date: 2015-02-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: narray