RubyGems - ckmeans - Versions diffs - 1.1.0 → 2.1.0 - Mend

ckmeans 1.1.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 508f78311a643e1fa8e693e4abf1cdf6df4eb06ff09756fa534ff4a514d0f34f
-  data.tar.gz: 4ef313387c2e45df4a8afde58e429093023a555a32f4af395a8b79c048a9d98d
+  metadata.gz: '08439cddf5410f952a06263d423297de219c86927dea2d0c4538916d9d0c70fd'
+  data.tar.gz: 94b1cece717f8538945208519c94046881f2d42c7ea9ba9bd453d063eef39878
 SHA512:
-  metadata.gz: ae0f1aff4bd6a78da04123d3728234012d0692ec22396b9529b245c8fa473343314508f053ee02ac876131b243704316948f476840d3c495d4e72eba68e095fd
-  data.tar.gz: ab95cfdacac4d9204887d4d5c5a7b85aafa3c869ec4b7a851ae994d8f15ddf096e99cb2d1691e6120a7a6bbe51cbde0524c93ad09b5811b7561c43c60a49256c
+  metadata.gz: 955d8b19a2a33d46b6adebfa05c9460784f5838d1a5d6d8c82e447be2eacb95da3a2711aebacf96429bf36fafe4d46dfc98fe87025cce4373c734c1cc6a60fb9
+  data.tar.gz: 8887346eb2602c071923ade05a549a0eb3df44491538fb79155bd766429dde0084847231e1a4f2dcf6e5e3c14f6dca309f2399339abe378becd4b21d8bc70aa7

data/README.md CHANGED Viewed

@@ -18,19 +18,52 @@ gem install ckmeans
 ## Usage
+### Basic Clustering
 ```rb
-# Fixed cluster count
-Ckmeans::Clusterer(data, kmin).clusters
-Ckmedian::Clusterer(data, kmin).clusters
+# Fixed cluster count (K known in advance)
+Ckmeans::Clusterer.new(data, 3).clusters
+Ckmedian::Clusterer.new(data, 3).clusters
+# Automatic K selection (tries K from kmin to kmax, picks optimal)
+Ckmeans::Clusterer.new(data, 1, 10).clusters
+Ckmedian::Clusterer.new(data, 1, 10).clusters
+```
+### Choosing Between Ckmeans and Ckmedian
-# Estimate optimal cluster count within kmin and kmax
-Ckmeans::Clusterer(data, kmin, kmax).clusters
-Ckmedian::Clusterer(data, kmin, kmax).clusters
+- **Ckmeans** - Minimizes squared distances (L2). Good for normally distributed data.
+- **Ckmedian** - Minimizes absolute distances (L1). More robust to outliers and data bursts.
-# Adjust Bayesian Information Criteria favoring more smaller clusters (Ckmeans only)
-Ckmeans::Clusterer(data, kmin, kmax, :sensitive).clusters
+```rb
+# For clean numerical data
+temperatures = [20.1, 20.2, 25.5, 25.6, 30.1, 30.2]
+Ckmeans::Clusterer.new(temperatures, 1, 5).clusters
+# => [[20.1, 20.2], [25.5, 25.6], [30.1, 30.2]]
+# For data with outliers (e.g., photo timestamps with bursts)
+timestamps = photos.map(&:taken_at).map(&:to_i)
+Ckmedian::Clusterer.new(timestamps, 1, 20).clusters
 ```
+### Stable Estimation (Recommended for Edge Cases)
+By default, both algorithms use a fast heuristic for estimating K. For datasets with many duplicates, tight clusters, or outliers, use `:stable` for more robust estimation:
+```rb
+# Stable estimation (uses statistical mixture models)
+Ckmeans::Clusterer.new(data, 1, 10, :stable).clusters
+Ckmedian::Clusterer.new(data, 1, 10, :stable).clusters
+```
+**When to use `:stable`:**
+- Small to medium datasets (< 1000 points)
+- Many duplicate values
+- Clusters with very different sizes
+- Photo/event timeline clustering (bursts and gaps)
+**Expert users:** `:stable` is an alias for `:gmm` (Gaussian Mixture Model) in Ckmeans and `:lmm` (Laplace Mixture Model) in Ckmedian.
 ## License
 The gem is available as open source under the terms of the [LGPL v3 License](https://opensource.org/license/lgpl-3-0).

data/ext/ckmeans/extensions.c CHANGED Viewed

@@ -39,7 +39,6 @@ typedef struct State {
     uint32_t xcount;
     uint32_t kmin;
     uint32_t kmax;
-    bool     apply_deviation;
     Arena   *arena;
     VectorF *xsorted;
     MatrixF *cost;
@@ -56,6 +55,8 @@ typedef struct RowParams {
     uint32_t istep;
 } RowParams;
+typedef uint32_t (FnFindKOptimal)(State);
 typedef struct {
     LDouble mean;
     LDouble variance;
@@ -63,7 +64,7 @@ typedef struct {
 VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
 VALUE rb_ckmedian_sorted_group_sizes(VALUE self);
-VALUE rb_sorted_group_sizes(VALUE self, FnDissim*);
+VALUE rb_sorted_group_sizes(VALUE self, FnDissim*, FnFindKOptimal*);
 Arena *arena_create(size_t);
 void  *arena_alloc(Arena*, size_t);
@@ -99,8 +100,9 @@ VectorI      *prune_candidates(State, RowParams, VectorI*);
 void         fill_even_positions(State, RowParams, VectorI*);
 SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t);
 VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
-uint32_t     find_koptimal(State);
+uint32_t     find_koptimal_fast(State);
+uint32_t     find_koptimal_gmm(State);
+uint32_t     find_koptimal_lmm(State);
 void Init_extensions(void) {
     VALUE ckmeans_module     = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
@@ -118,23 +120,26 @@ void Init_extensions(void) {
 VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
 {
-    return rb_sorted_group_sizes(self, dissimilarity_l2);
+    bool use_gmm = RTEST(rb_iv_get(self, "@use_gmm"));
+    FnFindKOptimal *find_k = use_gmm ? find_koptimal_gmm : find_koptimal_fast;
+    return rb_sorted_group_sizes(self, dissimilarity_l2, find_k);
 }
 VALUE rb_ckmedian_sorted_group_sizes(VALUE self)
 {
-    return rb_sorted_group_sizes(self, dissimilarity_l1);
+    bool use_lmm = RTEST(rb_iv_get(self, "@use_lmm"));
+    FnFindKOptimal *find_k = use_lmm ? find_koptimal_lmm : find_koptimal_fast;
+    return rb_sorted_group_sizes(self, dissimilarity_l1, find_k);
 }
-VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria)
+VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria, FnFindKOptimal *find_koptimal)
 {
-    uint32_t xcount      = NUM2UINT(rb_iv_get(self, "@xcount"));
-    uint32_t kmin        = NUM2UINT(rb_iv_get(self, "@kmin"));
-    uint32_t kmax        = NUM2UINT(rb_iv_get(self, "@kmax"));
-    bool apply_deviation = RTEST(rb_iv_get(self, "@apply_bic_deviation"));
-    VALUE rb_xsorted     = rb_iv_get(self, "@xsorted");
-    size_t capacity      = sizeof(LDouble) * (xcount + 1) * (kmax + 1) * ALLOCATION_FACTOR + ARENA_MIN_CAPACITY;
-    Arena *arena         = arena_create(capacity);
+    uint32_t xcount  = NUM2UINT(rb_iv_get(self, "@xcount"));
+    uint32_t kmin    = NUM2UINT(rb_iv_get(self, "@kmin"));
+    uint32_t kmax    = NUM2UINT(rb_iv_get(self, "@kmax"));
+    VALUE rb_xsorted = rb_iv_get(self, "@xsorted");
+    size_t capacity  = sizeof(LDouble) * (xcount + 2) * (kmax + 2) * ALLOCATION_FACTOR + ARENA_MIN_CAPACITY;
+    Arena *arena     = arena_create(capacity);
     if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
@@ -150,17 +155,16 @@ VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria)
     }
     State state = {
-        .arena           = arena,
-        .xcount          = xcount,
-        .kmin            = kmin,
-        .kmax            = kmax,
-        .apply_deviation = apply_deviation,
-        .xsorted         = xsorted,
-        .cost            = cost,
-        .splits          = splits,
-        .xsum            = xsum,
-        .xsumsq          = xsumsq,
-        .dissim          = criteria
+        .arena   = arena,
+        .xcount  = xcount,
+        .kmin    = kmin,
+        .kmax    = kmax,
+        .xsorted = xsorted,
+        .cost    = cost,
+        .splits  = splits,
+        .xsum    = xsum,
+        .xsumsq  = xsumsq,
+        .dissim  = criteria
     };
@@ -209,7 +213,7 @@ VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria)
     return response;
 }
-uint32_t find_koptimal(State state)
+uint32_t find_koptimal_fast(State state)
 {
     uint32_t kmin       = state.kmin;
     uint32_t kmax       = state.kmax;
@@ -256,8 +260,7 @@ uint32_t find_koptimal(State state)
                     loglikelihood += -(xi - mean) * (xi - mean) / (2.0 * variance);
                 }
                 loglikelihood += npoints * (
-                    (state.apply_deviation ? 0.0 : log(npoints / (LDouble) xcount)) -
-                    (0.5 * log(PIx2 * variance))
+                    log(npoints / (LDouble) xcount) - (0.5 * log(PIx2 * variance))
                 );
             } else {
                 loglikelihood += npoints * log(1.0 / bin_width / xcount);
@@ -280,6 +283,214 @@ uint32_t find_koptimal(State state)
     return kopt;
 }
+uint32_t find_koptimal_gmm(State state)
+{
+    uint32_t kmin = state.kmin;
+    uint32_t kmax = state.kmax;
+    uint32_t xcount = state.xcount;
+    if (kmin > kmax || xcount < 2) {
+        return (kmin < kmax) ? kmin : kmax;
+    }
+    Arena *arena       = state.arena;
+    VectorF *xsorted   = state.xsorted;
+    uint32_t kopt      = kmin;
+    LDouble max_bic    = 0.0;
+    LDouble log_xcount = log((LDouble) xcount);
+    VectorF *lambda    = vector_create_f(arena, kmax);
+    VectorF *mu        = vector_create_f(arena, kmax);
+    VectorF *sigma2    = vector_create_f(arena, kmax);
+    VectorF *coeff     = vector_create_f(arena, kmax);
+    VectorI *sizes     = vector_create_i(arena, kmax);
+    for (uint32_t kouter = kmin; kouter <= kmax; ++kouter)
+    {
+        uint32_t ileft = 0;
+        uint32_t iright;
+        backtrack_sizes(state, sizes, kouter);
+        for (uint32_t k = 0; k < kouter; ++k)
+        {
+            uint32_t size = vector_get_i(sizes, k);
+            vector_set_f(lambda, k, size / (LDouble) xcount);
+            iright = ileft + size - 1;
+            SegmentStats stats = shifted_data_variance(xsorted, ileft, iright);
+            vector_set_f(mu, k, stats.mean);
+            vector_set_f(sigma2, k, stats.variance);
+            if (stats.variance == 0 || size == 1) {
+                LDouble dmin;
+                if (ileft > 0 && iright < xcount - 1) {
+                    LDouble left_diff = vector_get_diff_f(xsorted, ileft, ileft - 1);
+                    LDouble right_diff = vector_get_diff_f(xsorted, iright + 1, iright);
+                    dmin = (left_diff < right_diff) ? left_diff : right_diff;
+                } else if (ileft > 0) {
+                    dmin = vector_get_diff_f(xsorted, ileft, ileft - 1);
+                } else {
+                    dmin = vector_get_diff_f(xsorted, iright + 1, iright);
+                }
+                if (stats.variance == 0) vector_set_f(sigma2, k, dmin * dmin / 4.0 / 9.0);
+                if (size == 1)  vector_set_f(sigma2, k, dmin * dmin);
+            }
+            LDouble lambda_k = vector_get_f(lambda, k);
+            LDouble sigma2_k = vector_get_f(sigma2, k);
+            vector_set_f(coeff, k, lambda_k / sqrt(PIx2 * sigma2_k));
+            ileft = iright + 1;
+        }
+        LDouble loglikelihood = 0.0;
+        for (uint32_t i = 0; i < xcount; ++i)
+        {
+            LDouble L  = 0.0;
+            LDouble xi = vector_get_f(xsorted, i);
+            for (uint32_t k = 0; k < kouter; ++k)
+            {
+                LDouble coeff_k   = vector_get_f(coeff, k);
+                LDouble mu_k      = vector_get_f(mu, k);
+                LDouble sigma2_k  = vector_get_f(sigma2, k);
+                LDouble x_mu_diff = xi - mu_k;
+                L                += coeff_k * exp(- x_mu_diff * x_mu_diff / (2.0 * sigma2_k));
+            }
+            loglikelihood += log(L);
+        }
+        LDouble bic = 2 * loglikelihood - (3 * kouter - 1) * log_xcount;
+        if (kouter == kmin) {
+            max_bic = bic;
+            kopt = kmin;
+        } else {
+            if (bic > max_bic) {
+                max_bic = bic;
+                kopt = kouter;
+            }
+        }
+    }
+    return kopt;
+}
+uint32_t find_koptimal_lmm(State state)
+{
+    uint32_t kmin = state.kmin;
+    uint32_t kmax = state.kmax;
+    uint32_t xcount = state.xcount;
+    if (kmin > kmax || xcount < 2) {
+        return (kmin < kmax) ? kmin : kmax;
+    }
+    Arena *arena       = state.arena;
+    VectorF *xsorted   = state.xsorted;
+    uint32_t kopt      = kmin;
+    LDouble max_bic    = 0.0;
+    LDouble log_xcount = log((LDouble) xcount);
+    VectorF *lambda    = vector_create_f(arena, kmax);
+    VectorF *mu        = vector_create_f(arena, kmax);  /* median */
+    VectorF *scale     = vector_create_f(arena, kmax);  /* MAD (mean absolute deviation) */
+    VectorF *coeff     = vector_create_f(arena, kmax);
+    VectorI *sizes     = vector_create_i(arena, kmax);
+    for (uint32_t kouter = kmin; kouter <= kmax; ++kouter)
+    {
+        uint32_t ileft = 0;
+        uint32_t iright;
+        backtrack_sizes(state, sizes, kouter);
+        for (uint32_t k = 0; k < kouter; ++k)
+        {
+            uint32_t size = vector_get_i(sizes, k);
+            vector_set_f(lambda, k, size / (LDouble) xcount);
+            iright = ileft + size - 1;
+            uint32_t median_idx = (ileft + iright) / 2;
+            LDouble median;
+            if ((size % 2) == 1) {
+                median = vector_get_f(xsorted, median_idx);
+            } else {
+                median = (vector_get_f(xsorted, median_idx) + vector_get_f(xsorted, median_idx + 1)) / 2.0;
+            }
+            vector_set_f(mu, k, median);
+            LDouble mad = 0.0;
+            for (uint32_t i = ileft; i <= iright; ++i) {
+                LDouble xi = vector_get_f(xsorted, i);
+                mad += fabs(xi - median);
+            }
+            mad = mad / size;
+            vector_set_f(scale, k, mad);
+            /* Handle edge case: MAD = 0 (all points are the same) or size = 1 */
+            if (mad == 0 || size == 1) {
+                LDouble dmin;
+                if (ileft > 0 && iright < xcount - 1) {
+                    LDouble left_diff = vector_get_diff_f(xsorted, ileft, ileft - 1);
+                    LDouble right_diff = vector_get_diff_f(xsorted, iright + 1, iright);
+                    dmin = (left_diff < right_diff) ? left_diff : right_diff;
+                } else if (ileft > 0) {
+                    dmin = vector_get_diff_f(xsorted, ileft, ileft - 1);
+                } else {
+                    dmin = vector_get_diff_f(xsorted, iright + 1, iright);
+                }
+                if (mad == 0) vector_set_f(scale, k, dmin / 6.0);
+                if (size == 1) vector_set_f(scale, k, dmin);
+            }
+            /* Laplace coefficient: lambda_k / (2 * b_k) */
+            LDouble lambda_k = vector_get_f(lambda, k);
+            LDouble scale_k  = vector_get_f(scale, k);
+            vector_set_f(coeff, k, lambda_k / (2.0 * scale_k));
+            ileft = iright + 1;
+        }
+        LDouble loglikelihood = 0.0;
+        for (uint32_t i = 0; i < xcount; ++i)
+        {
+            LDouble L  = 0.0;
+            LDouble xi = vector_get_f(xsorted, i);
+            for (uint32_t k = 0; k < kouter; ++k)
+            {
+                LDouble coeff_k  = vector_get_f(coeff, k);
+                LDouble mu_k     = vector_get_f(mu, k);
+                LDouble scale_k  = vector_get_f(scale, k);
+                LDouble x_mu_abs = fabs(xi - mu_k);
+                /* Laplace PDF: (1/(2b)) * exp(-|x-μ|/b) */
+                L               += coeff_k * exp(-x_mu_abs / scale_k);
+            }
+            loglikelihood += log(L);
+        }
+        /* BIC = 2*logL - (3k-1)*log(n) */
+        /* Parameters: k-1 mixing proportions + k medians + k scales = 3k-1 */
+        LDouble bic = 2 * loglikelihood - (3 * kouter - 1) * log_xcount;
+        if (kouter == kmin) {
+            max_bic = bic;
+            kopt = kmin;
+        } else {
+            if (bic > max_bic) {
+                max_bic = bic;
+                kopt = kouter;
+            }
+        }
+    }
+    return kopt;
+}
 VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
 {
     MatrixI *splits = state.splits;
@@ -287,12 +498,12 @@ VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
     uint32_t right  = xcount - 1;
     uint32_t left   = 0;
-    // Common case works with `i` remaining unsigned and unconditional assignment of the next `left` and `right`
+    /* Common case works with `i` remaining unsigned and unconditional assignment of the next `left` and `right` */
     for (uint32_t i = k - 1; i > 0; i--, right = left - 1) {
         left = matrix_get_i(splits, i, right);
         vector_set_i(sizes, i, right - left + 1);
     }
-    // Special case outside of the loop removing the need for conditionals
+    /* Special case outside of the loop removing the need for conditionals */
     left = matrix_get_i(splits, 0, right);
     vector_set_i(sizes, 0, right - left + 1);
@@ -416,12 +627,12 @@ inline void fill_even_positions(State state, RowParams rparams, VectorI *split_c
 inline void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
 {
-    const uint32_t row    = rparams.row;
-    const uint32_t imin   = rparams.imin;
-    const uint32_t imax   = rparams.imax;
-    const uint32_t istep  = rparams.istep;
-    MatrixF *const cost   = state.cost;
-    MatrixI *const splits = state.splits;
+    const uint32_t row     = rparams.row;
+    const uint32_t imin    = rparams.imin;
+    const uint32_t imax    = rparams.imax;
+    const uint32_t istep   = rparams.istep;
+    MatrixF *const cost    = state.cost;
+    MatrixI *const splits  = state.splits;
     FnDissim *const dissim = state.dissim;
     uint32_t optimal_split_idx_prev = 0;
@@ -723,7 +934,7 @@ Arena *arena_create(size_t capacity) {
 }
 void *arena_alloc(Arena *arena, size_t size) {
-    size = (size + 7) & ~7;
+    size = (size + 0xf) & ~0xf;
     if (arena->offset + size > arena->capacity) {
         rb_raise(rb_eNoMemError, "Arena Insufficient Capacity");

data/lib/ckmeans/clusterer.rb CHANGED Viewed

@@ -1,19 +1,37 @@
 # frozen_string_literal: true
 module Ckmeans
-  class Clusterer # rubocop:disable Style/Documentation
-    def initialize(entries, kmin, kmax = kmin, kestimate = :regular)
+  # Optimal k-means clustering for univariate (1D) data using dynamic programming.
+  # Minimizes within-cluster sum of squared distances (L2 norm).
+  class Clusterer
+    # Creates a new Ckmeans clusterer.
+    #
+    # @param entries [Array<Numeric>] The data points to cluster
+    # @param kmin [Integer] Minimum number of clusters to consider
+    # @param kmax [Integer] Maximum number of clusters to consider (defaults to kmin for fixed K)
+    # @param kestimate [Symbol] Method for estimating optimal K:
+    #   - :fast   - Quick heuristic using implicit Gaussian assumption (best for large datasets)
+    #   - :stable - Model-based estimation using Gaussian Mixture Model (better for duplicates/edge cases)
+    #   - :gmm    - Alias for :stable (Gaussian Mixture Model)
+    #
+    # @example Fixed number of clusters
+    #   Ckmeans::Clusterer.new([1, 2, 3, 100, 101], 2).clusters
+    #   # => [[1, 2, 3], [100, 101]]
+    #
+    # @example Automatic K selection with stable estimation
+    #   Ckmeans::Clusterer.new([1, 1, 1, 5, 5, 5, 10, 10, 10], 1, 5, :stable).clusters
+    def initialize(entries, kmin, kmax = kmin, kestimate = :fast)
       @xcount = entries.size
       raise ArgumentError, "Minimum cluster count is bigger than element count" if kmin > @xcount
       raise ArgumentError, "Maximum cluster count is bigger than element count" if kmax > @xcount
-      @kmin                = kmin
-      @unique_xcount       = entries.uniq.size
-      @kmax                = [@unique_xcount, kmax].min
-      @xsorted_original    = entries.sort
-      @xsorted             = @xsorted_original.map(&:to_f)
-      @apply_bic_deviation = kestimate == :sensitive
+      @kmin             = kmin
+      @unique_xcount    = entries.uniq.size
+      @kmax             = [@unique_xcount, kmax].min
+      @xsorted_original = entries.sort
+      @xsorted          = @xsorted_original.map(&:to_f)
+      @use_gmm          = %i[gmm stable].include?(kestimate)
     end
     def clusters

data/lib/ckmeans/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Ckmeans
-  VERSION = "1.1.0"
+  VERSION = "2.1.0"
 end

data/lib/ckmedian/clusterer.rb CHANGED Viewed

@@ -1,8 +1,28 @@
 # frozen_string_literal: true
 module Ckmedian
-  class Clusterer # rubocop:disable Style/Documentation
-    def initialize(entries, kmin, kmax = kmin)
+  # Optimal k-median clustering for univariate (1D) data using dynamic programming.
+  # Minimizes within-cluster sum of absolute deviations (L1 norm).
+  # More robust to outliers than k-means.
+  class Clusterer
+    # Creates a new Ckmedian clusterer.
+    #
+    # @param entries [Array<Numeric>] The data points to cluster
+    # @param kmin [Integer] Minimum number of clusters to consider
+    # @param kmax [Integer] Maximum number of clusters to consider (defaults to kmin for fixed K)
+    # @param kestimate [Symbol] Method for estimating optimal K:
+    #   - :fast   - Quick heuristic using implicit Gaussian assumption (best for large datasets)
+    #   - :stable - Model-based estimation using Laplace Mixture Model (better for outliers/bursts)
+    #   - :lmm    - Alias for :stable (Laplace Mixture Model)
+    #
+    # @example Fixed number of clusters
+    #   Ckmedian::Clusterer.new([1, 2, 3, 100, 101], 2).clusters
+    #   # => [[1, 2, 3], [100, 101]]
+    #
+    # @example Photo timeline clustering (robust to bursts and outliers)
+    #   timestamps = photos.map(&:taken_at).map(&:to_i)
+    #   Ckmedian::Clusterer.new(timestamps, 1, 20, :stable).clusters
+    def initialize(entries, kmin, kmax = kmin, kestimate = :fast)
       @xcount = entries.size
       raise ArgumentError, "Minimum cluster count is bigger than element count" if kmin > @xcount
@@ -13,6 +33,7 @@ module Ckmedian
       @kmax             = [@unique_xcount, kmax].min
       @xsorted_original = entries.sort
       @xsorted          = @xsorted_original.map(&:to_f)
+      @use_lmm          = %i[lmm stable].include?(kestimate)
     end
     def clusters

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ckmeans
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 2.1.0
 platform: ruby
 authors:
 - Vlad Lebedev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2025-05-23 00:00:00.000000000 Z
+date: 2025-12-14 00:00:00.000000000 Z
 dependencies: []
 description: Repeatable clustering of unidimensional data
 email: