RubyGems - rumale-clustering - Versions diffs - 0.24.0 → 0.26.0 - Mend

rumale-clustering 0.24.0 → 0.26.0

Files changed (6) hide show

checksums.yaml +4 -4
data/LICENSE.txt +1 -1
data/lib/rumale/clustering/mean_shift.rb +116 -0
data/lib/rumale/clustering/version.rb +1 -1
data/lib/rumale/clustering.rb +1 -0
metadata +5 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 26c8d431fa54beb0ef656cb5c058176ed8b777dcd1075d4ff859c37ca458ab98
-  data.tar.gz: e180764368160a0273fc42e92238beaa25e93ebbbee0766dfb9f0efed2bc80fe
+  metadata.gz: d4bd3dd2f04f44e145f7ebe90154dfd9e2970485c58cd68af82abd0f744e78c0
+  data.tar.gz: 9af9eafaa42f596f75c09a13b6da658bf7ceb69af657a22bc50f6c58b7a2eb05
 SHA512:
-  metadata.gz: e5386f87dbed2376c712b9f1e74484f757d0bd6e89b8d1c5455865405f4561ae22f4245863ecc06894202e3bea7373f97c767cd2e182172931eb58c18ee47220
-  data.tar.gz: 52e855b335ea4454850ffc2ab18a2c89c34849bb88f0b59af59073071d803e926c69638241f47d320f40aa06d945b105887e4e9d84ef453d404a43c1825470a5
+  metadata.gz: 2bcbe3e94d4ae65507fb6253b68264dcc42ed455a7150d7096ea71d4d838dae2de2ba2b7991e06b258b16e28f701f5c350b5e26a7c06f43a0b08d6e0145c5cb1
+  data.tar.gz: c08a182fd31b16aaad51186c4dfe3457e5ea7d5f9b956a966ead434a036d9817189cd3e5ee7be824a04c06baf60771423f9ef23c0488aceb1705dfde7aaeb8a4

data/LICENSE.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2022 Atsushi Tatsuma
+Copyright (c) 2022-2023 Atsushi Tatsuma
 All rights reserved.
 Redistribution and use in source and binary forms, with or without

data/lib/rumale/clustering/mean_shift.rb ADDED Viewed

@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+require 'rumale/base/estimator'
+require 'rumale/base/cluster_analyzer'
+require 'rumale/pairwise_metric'
+require 'rumale/validation'
+module Rumale
+  module Clustering
+    # MeanShift is a class that implements mean-shift clustering with flat kernel.
+    #
+    # @example
+    #   require 'rumale/clustering/mean_shift'
+    #
+    #   analyzer = Rumale::Clustering::MeanShift.new(bandwidth: 1.5)
+    #   cluster_labels = analyzer.fit_predict(samples)
+    #
+    # *Reference*
+    # - Carreira-Perpinan, M A., "A review of mean-shift algorithms for clustering," arXiv:1503.00687v1.
+    # - Sheikh, Y A., Khan, E A., and Kanade, T., "Mode-seeking by Medoidshifts," Proc. ICCV'07, pp. 1--8, 2007.
+    # - Vedaldi, A., and Soatto, S., "Quick Shift and Kernel Methods for Mode Seeking," Proc. ECCV'08, pp. 705--718, 2008.
+    class MeanShift < Rumale::Base::Estimator
+      include Rumale::Base::ClusterAnalyzer
+      # Return the centroids.
+      # @return [Numo::DFloat] (shape: [n_clusters, n_features])
+      attr_reader :cluster_centers
+      # Create a new cluster analyzer with mean-shift algorithm.
+      #
+      # @param bandwidth [Float] The bandwidth parameter of flat kernel.
+      # @param max_iter [Integer] The maximum number of iterations.
+      # @param tol [Float] The tolerance of termination criterion
+      def initialize(bandwidth: 1.0, max_iter: 500, tol: 1e-4)
+        super()
+        @params = {
+          bandwidth: bandwidth,
+          max_iter: max_iter,
+          tol: tol
+        }
+      end
+      # Analysis clusters with given training data.
+      #
+      # @overload fit(x) -> MeanShift
+      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
+      #   @return [MeanShift] The learned cluster analyzer itself.
+      def fit(x, _y = nil)
+        x = Rumale::Validation.check_convert_sample_array(x)
+        z = x.dup
+        @params[:max_iter].times do
+          distance_mat = Rumale::PairwiseMetric.euclidean_distance(x, z)
+          kernel_mat = Numo::DFloat.cast(distance_mat.le(@params[:bandwidth]))
+          sum_kernel = kernel_mat.sum(axis: 0)
+          weight_mat = kernel_mat.dot((1 / sum_kernel).diag)
+          updated = weight_mat.transpose.dot(x)
+          break if (z - updated).abs.sum(axis: 1).max <= @params[:tol]
+          z = updated
+        end
+        @cluster_centers = connect_components(z)
+        self
+      end
+      # Predict cluster labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
+      def predict(x)
+        x = Rumale::Validation.check_convert_sample_array(x)
+        assign_cluster(x)
+      end
+      # Analysis clusters and assign samples to clusters.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
+      def fit_predict(x)
+        x = Rumale::Validation.check_convert_sample_array(x)
+        fit(x).predict(x)
+      end
+      private
+      def assign_cluster(x)
+        n_clusters = @cluster_centers.shape[0]
+        distance_mat = Rumale::PairwiseMetric.squared_error(x, @cluster_centers)
+        distance_mat.min_index(axis: 1) - Numo::Int32[*0.step(distance_mat.size - 1, n_clusters)]
+      end
+      def connect_components(z)
+        centers = []
+        n_samples = z.shape[0]
+        n_samples.times do |idx|
+          assigned = false
+          centers.each do |cluster_vec|
+            dist = Math.sqrt(((z[idx, true] - cluster_vec)**2).sum.abs)
+            if dist <= @params[:bandwidth]
+              assigned = true
+              break
+            end
+          end
+          centers << z[idx, true].dup unless assigned
+        end
+        Numo::DFloat.asarray(centers)
+      end
+    end
+  end
+end

data/lib/rumale/clustering/version.rb CHANGED Viewed

@@ -5,6 +5,6 @@ module Rumale
   # This module consists of classes that implement cluster analysis methods.
   module Clustering
     # @!visibility private
-    VERSION = '0.24.0'
+    VERSION = '0.26.0'
   end
 end

data/lib/rumale/clustering.rb CHANGED Viewed

@@ -7,6 +7,7 @@ require_relative 'clustering/gaussian_mixture'
 require_relative 'clustering/hdbscan'
 require_relative 'clustering/k_means'
 require_relative 'clustering/k_medoids'
+require_relative 'clustering/mean_shift'
 require_relative 'clustering/mini_batch_k_means'
 require_relative 'clustering/power_iteration'
 require_relative 'clustering/single_linkage'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale-clustering
 version: !ruby/object:Gem::Version
-  version: 0.24.0
+  version: 0.26.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2022-12-31 00:00:00.000000000 Z
+date: 2023-02-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -30,14 +30,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.24.0
+        version: 0.26.0
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.24.0
+        version: 0.26.0
 description: |
   Rumale::Clustering provides cluster analysis algorithms,
   such as K-Means, Gaussian Mixture Model, DBSCAN, and Spectral Clustering,
@@ -56,6 +56,7 @@ files:
 - lib/rumale/clustering/hdbscan.rb
 - lib/rumale/clustering/k_means.rb
 - lib/rumale/clustering/k_medoids.rb
+- lib/rumale/clustering/mean_shift.rb
 - lib/rumale/clustering/mini_batch_k_means.rb
 - lib/rumale/clustering/power_iteration.rb
 - lib/rumale/clustering/single_linkage.rb