RubyGems - ckmeans - Versions diffs - 1.0.1 → 1.0.3 - Mend

ckmeans 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3646d5dfdbc85e6168c9b315a52d7b345432f1b964ab96f1dd18bd04c62f867c
-  data.tar.gz: 562d6aaeff0d81b1ff804886870ed09df1d0115867f9127b8a9b8283be638e11
+  metadata.gz: 5ad7e8c24dd367d5e6a6dd66abc529ae92079cf99d1c781a7646c929547b0e62
+  data.tar.gz: 2e338ca878eba2d250ca61fff2ea8bee44ec8387b37e12b31600edf9da2b7130
 SHA512:
-  metadata.gz: bca50713fcd779e6d2fb7bd37b44cd7ed16a4879736139f531a37f99666f4c797bae5b27b574e26fb6fffdd616ded97a397b29ac1d6e8e268e63eae073051de1
-  data.tar.gz: 167e1865fb1707b054a13f0dcabfcd8bde2ed19811886fb312a54d129b136a8c4663b961e220097fbede697f02a58c1e72de3ec0640c33aadba6b59e404c0acb
+  metadata.gz: 8c59e1e159cc9cada8afed9e016a5d8956cfe909bb7b7d82c8d155f388fdf1924a49072d37e52065fa643a539da3a192767eddb38da95b2c2524bcc7d0a39ebd
+  data.tar.gz: f2b535377d441bc1f2ee309a5466c8231b425aa0dd9b0512aa36257defa12b3b645694ae953b2b5e3b6997c50bde796e8fa1c2f8f10d4055b1cc9cb6abcf1353

data/.dockerignore ADDED Viewed

@@ -0,0 +1,13 @@
+tmp
+# Ignore compiled extension files
+*.bundle
+*.so
+*.o
+*.dll
+# Ignore generated makefiles and compilation artifacts
+ext/**/Makefile
+ext/**/mkmf.log
+ext/**/*.log
+ext/**/tmp/

data/.rubocop_todo.yml CHANGED Viewed

@@ -1,49 +1,13 @@
 # This configuration was generated by
 # `rubocop --auto-gen-config`
-# on 2025-04-17 07:09:28 UTC using RuboCop version 1.75.1.
+# on 2025-04-24 06:16:37 UTC using RuboCop version 1.75.1.
 # The point is for the user to remove these configuration records
 # one by one as the offenses are removed from the code base.
 # Note that changes in the inspected code, or installation of new
 # versions of RuboCop, may require this file to be generated again.
-# Offense count: 7
-# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
-Metrics/AbcSize:
-  Max: 95
-# Offense count: 2
+# Offense count: 1
 # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
 # AllowedMethods: refine
 Metrics/BlockLength:
-  Max: 41
-# Offense count: 3
-# Configuration parameters: AllowedMethods, AllowedPatterns.
-Metrics/CyclomaticComplexity:
-  Max: 10
-# Offense count: 6
-# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
-Metrics/MethodLength:
-  Max: 48
-# Offense count: 3
-# Configuration parameters: AllowedMethods, AllowedPatterns.
-Metrics/PerceivedComplexity:
-  Max: 13
-# Offense count: 12
-# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
-# AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
-Naming/MethodParameterName:
-  Exclude:
-    - 'lib/ckmeans/clusterer.rb'
-# Offense count: 5
-# This cop supports unsafe autocorrection (--autocorrect-all).
-# Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns.
-# SupportedStyles: predicate, comparison
-Style/NumericPredicate:
-  Exclude:
-    - 'spec/**/*'
-    - 'lib/ckmeans/clusterer.rb'
+  Max: 26

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,14 @@
 ## [Unreleased]
+## [1.0.1] - 2025-04-24
+- https://github.com/vlebedeff/rb-ckmeans/pull/9
+- https://github.com/vlebedeff/rb-ckmeans/pull/8
+## [1.0.0] - 2025-04-22
+- https://github.com/vlebedeff/rb-ckmeans/pull/6
 ## [0.1.2] - 2025-03-31
 - https://github.com/vlebedeff/rb-ckmeans/pull/3

data/Dockerfile ADDED Viewed

@@ -0,0 +1,11 @@
+FROM public.ecr.aws/docker/library/ruby:3.2.2
+RUN apt-get update && apt-get install -y build-essential ruby-dev
+RUN gem install bundler -v 2.6.5
+WORKDIR /opt/rb-ckmeans
+COPY . .
+RUN bundle install -j 12
+ENTRYPOINT ["bundle", "exec"]

data/ext/ckmeans/extensions.c CHANGED Viewed

@@ -1,18 +1,20 @@
 #include <stdio.h>
-#include <assert.h>
 #include <math.h>
+#include <string.h>
 #include "ruby.h"
+typedef long double LDouble;
 typedef struct Arena {
-    uint32_t capacity;
-    uint32_t offset;
+    size_t capacity;
+    size_t offset;
     uint8_t  *buffer;
 } Arena;
 typedef struct MatrixF {
     uint32_t ncols;
     uint32_t nrows;
-    long double *values;
+    LDouble *values;
 } MatrixF;
 typedef struct MatrixI {
@@ -22,12 +24,12 @@ typedef struct MatrixI {
 } MatrixI;
 typedef struct VectorF {
-    uint32_t nvalues;
-    long double *values;
+    uint32_t size;
+    LDouble *values;
 } VectorF;
 typedef struct VectorI {
-    uint32_t nvalues;
+    uint32_t size;
     uint32_t *values;
 } VectorI;
@@ -52,84 +54,82 @@ typedef struct RowParams {
 } RowParams;
 typedef struct {
-    long double mean;
-    long double variance;
+    LDouble mean;
+    LDouble variance;
 } SegmentStats;
-VALUE        rb_ckmeans_sorted_group_sizes(VALUE self);
-Arena       *arena_create(uint32_t);
-void        *arena_alloc(Arena*, uint32_t);
-void         arena_rewind(Arena*);
-void         arena_destroy(Arena*);
-MatrixF     *matrix_create_f(Arena*, uint32_t, uint32_t);
-MatrixI     *matrix_create_i(Arena*, uint32_t, uint32_t);
-void         matrix_set_f(MatrixF*, uint32_t, uint32_t, long double value);
-long double  matrix_get_f(MatrixF*, uint32_t, uint32_t);
-void         matrix_inspect_f(MatrixF*);
-void         matrix_set_i(MatrixI*, uint32_t, uint32_t, uint32_t value);
-uint32_t     matrix_get_i(MatrixI*, uint32_t, uint32_t);
-void         matrix_inspect_i(MatrixI*);
-VectorF     *vector_create_f(Arena*, uint32_t);
-void         vector_set_f(VectorF*, uint32_t offset, long double value);
-long double  vector_get_f(VectorF*, uint32_t offset);
-long double  vector_get_diff_f(VectorF*, uint32_t, uint32_t);
-void         vector_inspect_f(VectorF*);
-VectorI     *vector_create_i(Arena*, uint32_t);
-VectorI     *vector_dup_i(VectorI*, Arena*);
-void         vector_set_i(VectorI*, uint32_t offset, uint32_t value);
-uint32_t     vector_get_i(VectorI*, uint32_t offset);
-void         vector_downsize_i(VectorI*, uint32_t);
-void         vector_inspect_i(VectorI*);
-long double  dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
+VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
+Arena *arena_create(size_t);
+void  *arena_alloc(Arena*, size_t);
+void  arena_destroy(Arena*);
+MatrixF  *matrix_create_f(Arena*, uint32_t, uint32_t);
+MatrixI  *matrix_create_i(Arena*, uint32_t, uint32_t);
+void     matrix_set_f(MatrixF*, uint32_t, uint32_t, LDouble value);
+LDouble  matrix_get_f(MatrixF*, uint32_t, uint32_t);
+void     matrix_inspect_f(MatrixF*);
+void     matrix_set_i(MatrixI*, uint32_t, uint32_t, uint32_t value);
+uint32_t matrix_get_i(MatrixI*, uint32_t, uint32_t);
+void     matrix_inspect_i(MatrixI*);
+VectorF  *vector_create_f(Arena*, uint32_t);
+void     vector_set_f(VectorF*, uint32_t offset, LDouble value);
+LDouble  vector_get_f(VectorF*, uint32_t offset);
+LDouble  vector_get_diff_f(VectorF*, uint32_t, uint32_t);
+void     vector_inspect_f(VectorF*);
+VectorI  *vector_create_i(Arena*, uint32_t);
+VectorI  *vector_dup_i(VectorI*, Arena*);
+void     vector_set_i(VectorI*, uint32_t offset, uint32_t value);
+uint32_t vector_get_i(VectorI*, uint32_t offset);
+void     vector_downsize_i(VectorI*, uint32_t);
+void     vector_inspect_i(VectorI*);
+LDouble      dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
 void         fill_row(State, uint32_t, uint32_t, uint32_t);
 void         smawk(State, RowParams, VectorI*);
 void         find_min_from_candidates(State, RowParams, VectorI*);
-VectorI     *prune_candidates(State, RowParams, VectorI*);
+VectorI      *prune_candidates(State, RowParams, VectorI*);
 void         fill_even_positions(State, RowParams, VectorI*);
 SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t);
-VectorI     *backtrack_sizes(State, uint32_t);
+VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
 uint32_t     find_koptimal(State);
 void Init_extensions(void) {
-    VALUE ckmeans_module = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
+    VALUE ckmeans_module  = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
     VALUE clusterer_class = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
     rb_define_private_method(clusterer_class, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
 }
-# define ARENA_MIN_CAPACITY 1024
-# define ALLOCATION_FACTOR 20
+# define ARENA_MIN_CAPACITY 100
 # define PIx2 (M_PI * 2.0)
-VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
-    VALUE rb_xcount              = rb_ivar_get(self, rb_intern("@xcount"));
-    VALUE rb_kmin                = rb_ivar_get(self, rb_intern("@kmin"));
-    VALUE rb_kmax                = rb_ivar_get(self, rb_intern("@kmax"));
-    VALUE rb_xsorted             = rb_ivar_get(self, rb_intern("@xsorted"));
-    VALUE rb_apply_bic_deviation = rb_ivar_get(self, rb_intern("@apply_bic_deviation"));
-    uint32_t xcount              = NUM2UINT(rb_xcount);
-    uint32_t kmin                = NUM2UINT(rb_kmin);
-    uint32_t kmax                = NUM2UINT(rb_kmax);
-    bool apply_deviation         = RTEST(rb_apply_bic_deviation);
-    Arena *arena                 = arena_create(sizeof(int) * xcount * kmax * ALLOCATION_FACTOR);
-    if (arena == NULL) {
-        return Qnil;
-    }
+VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
+{
+    uint32_t xcount      = NUM2UINT(rb_iv_get(self, "@xcount"));
+    uint32_t kmin        = NUM2UINT(rb_iv_get(self, "@kmin"));
+    uint32_t kmax        = NUM2UINT(rb_iv_get(self, "@kmax"));
+    bool apply_deviation = RTEST(rb_iv_get(self, "@apply_bic_deviation"));
+    VALUE rb_xsorted     = rb_iv_get(self, "@xsorted");
+    Arena *arena         =
+        arena_create(
+            sizeof(LDouble) * xcount * (kmax + 4) +
+            sizeof(uint32_t) * xcount * kmax * 5 +
+            ARENA_MIN_CAPACITY
+        );
+    if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
     MatrixF *cost    = matrix_create_f(arena, kmax, xcount);
     MatrixI *splits  = matrix_create_i(arena, kmax, xcount);
     VectorF *xsorted = vector_create_f(arena, xcount);
-    /* TODO: pack sums into one vector of pairs */
     VectorF *xsum    = vector_create_f(arena, xcount);
     VectorF *xsumsq  = vector_create_f(arena, xcount);
     for (uint32_t i = 0; i < xcount; i++) {
-        long double xi = NUM2DBL(rb_ary_entry(rb_xsorted, i));
+        LDouble xi = NUM2DBL(rb_ary_entry(rb_xsorted, i));
         vector_set_f(xsorted, i, xi);
     }
@@ -147,17 +147,17 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
     };
-    long double shift        = vector_get_f(xsorted, xcount / 2);
-    long double diff_initial = vector_get_f(xsorted, 0) - shift;
+    LDouble shift        = vector_get_f(xsorted, xcount / 2);
+    LDouble diff_initial = vector_get_f(xsorted, 0) - shift;
     vector_set_f(xsum, 0, diff_initial);
     vector_set_f(xsumsq, 0, diff_initial * diff_initial);
     for (uint32_t i = 1; i < xcount; i++) {
-        long double xi          = vector_get_f(xsorted, i);
-        long double xsum_prev   = vector_get_f(xsum, i - 1);
-        long double xsumsq_prev = vector_get_f(xsumsq, i - 1);
-        long double diff        = xi - shift;
+        LDouble xi          = vector_get_f(xsorted, i);
+        LDouble xsum_prev   = vector_get_f(xsum, i - 1);
+        LDouble xsumsq_prev = vector_get_f(xsumsq, i - 1);
+        LDouble diff        = xi - shift;
         vector_set_f(xsum, i, xsum_prev + diff);
         vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
@@ -172,7 +172,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
     uint32_t koptimal = find_koptimal(state);
-    VectorI *sizes = backtrack_sizes(state, koptimal);
+    VectorI *sizes = vector_create_i(arena, koptimal);
+    backtrack_sizes(state, sizes, koptimal);
     /* printf("XSORTED \t"); vector_inspect_f(xsorted); */
     /* printf("K OPTIMAL: %lld\n", koptimal); */
@@ -180,8 +181,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
     /* printf("FINAL COST\n"); matrix_inspect_f(cost); */
     /* printf("FINAL SPLITS\n"); matrix_inspect_i(splits); */
-    VALUE response = rb_ary_new2(sizes->nvalues);
-    for (uint32_t i = 0; i < sizes->nvalues; i++) {
+    VALUE response = rb_ary_new2(sizes->size);
+    for (uint32_t i = 0; i < sizes->size; i++) {
         VALUE size = LONG2NUM(vector_get_i(sizes, i));
         rb_ary_store(response, i, size);
     }
@@ -193,29 +194,30 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
 uint32_t find_koptimal(State state)
 {
-    uint32_t kmin           = state.kmin;
-    uint32_t kmax           = state.kmax;
-    uint32_t xcount         = state.xcount;
-    uint32_t kopt           = kmin;
-    uint32_t xindex_max     = state.xcount - 1;
-    VectorF *xsorted       = state.xsorted;
-    long double x0         = vector_get_f(xsorted, 0);
-    long double xn         = vector_get_f(xsorted, xindex_max);
-    long double max_bic    = 0.0;
-    long double adjustment = state.apply_deviation ? 0.0 : 1.0;
+    uint32_t kmin       = state.kmin;
+    uint32_t kmax       = state.kmax;
+    uint32_t xcount     = state.xcount;
+    uint32_t kopt       = kmin;
+    uint32_t xindex_max = state.xcount - 1;
+    VectorF *xsorted    = state.xsorted;
+    LDouble x0          = vector_get_f(xsorted, 0);
+    LDouble xn          = vector_get_f(xsorted, xindex_max);
+    LDouble max_bic     = 0.0;
+    LDouble xcount_log  = log((LDouble) xcount);
+    VectorI *sizes = vector_create_i(state.arena, kmax);
     for (uint32_t k = kmin; k <= kmax; k++) {
         uint32_t index_right, index_left = 0;
-        long double bin_left, bin_right, loglikelihood = 0.0;
-        VectorI *sizes = backtrack_sizes(state, k);
+        LDouble bin_left, bin_right, loglikelihood = 0.0;
+        backtrack_sizes(state, sizes, k);
         for (uint32_t kb = 0; kb < k; kb++) {
-            uint32_t npoints   = vector_get_i(sizes, kb);
-            index_right        = index_left + npoints - 1;
-            long double xleft  = vector_get_f(xsorted, index_left);
-            long double xright = vector_get_f(xsorted, index_right);
-            bin_left           = xleft;
-            bin_right          = xright;
+            uint32_t npoints = vector_get_i(sizes, kb);
+            index_right      = index_left + npoints - 1;
+            LDouble xleft    = vector_get_f(xsorted, index_left);
+            LDouble xright   = vector_get_f(xsorted, index_right);
+            bin_left         = xleft;
+            bin_right        = xright;
             if (xleft == xright) {
                 bin_left  = index_left == 0
@@ -226,18 +228,18 @@ uint32_t find_koptimal(State state)
                     : xn;
             }
-            long double bin_width = bin_right - bin_left;
-            SegmentStats stats    = shifted_data_variance(xsorted, index_left, index_right);
-            long double mean      = stats.mean;
-            long double variance  = stats.variance;
+            LDouble bin_width  = bin_right - bin_left;
+            SegmentStats stats = shifted_data_variance(xsorted, index_left, index_right);
+            LDouble mean       = stats.mean;
+            LDouble variance   = stats.variance;
             if (variance > 0) {
                 for (uint32_t i = index_left; i <= index_right; i++) {
-                    long double xi = vector_get_f(xsorted, i);
+                    LDouble xi     = vector_get_f(xsorted, i);
                     loglikelihood += -(xi - mean) * (xi - mean) / (2.0 * variance);
                 }
                 loglikelihood += npoints * (
-                    (log(npoints / (long double) xcount) * adjustment) -
+                    (state.apply_deviation ? 0.0 : log(npoints / (LDouble) xcount)) -
                     (0.5 * log(PIx2 * variance))
                 );
             } else {
@@ -247,24 +249,23 @@ uint32_t find_koptimal(State state)
             index_left = index_right + 1;
         }
-        long double bic = (2.0 * loglikelihood) - (((3 * k) - 1) * log((long double) xcount));
+        LDouble bic = (2.0 * loglikelihood) - (((3 * k) - 1) * xcount_log);
         if (k == kmin) {
             max_bic = bic;
-            kopt = kmin;
+            kopt    = kmin;
         } else if (bic > max_bic) {
             max_bic = bic;
-            kopt = k;
+            kopt    = k;
         }
     }
     return kopt;
 }
-VectorI *backtrack_sizes(State state, uint32_t k)
+VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
 {
     MatrixI *splits = state.splits;
-    VectorI *sizes  = vector_create_i(state.arena, k);
     uint32_t xcount = state.xcount;
     uint32_t right  = xcount - 1;
     uint32_t left   = 0;
@@ -274,6 +275,7 @@ VectorI *backtrack_sizes(State state, uint32_t k)
         left = matrix_get_i(splits, i, right);
         vector_set_i(sizes, i, right - left + 1);
     }
+    // Special case outside of the loop removing the need for conditionals
     left = matrix_get_i(splits, 0, right);
     vector_set_i(sizes, 0, right - left + 1);
@@ -282,16 +284,16 @@ VectorI *backtrack_sizes(State state, uint32_t k)
 SegmentStats shifted_data_variance(VectorF *xsorted, uint32_t left, uint32_t right)
 {
-    const uint32_t n    = right - left + 1;
-    long double sum    = 0.0;
-    long double sumsq  = 0.0;
+    const uint32_t n   = right - left + 1;
+    LDouble sum        = 0.0;
+    LDouble sumsq      = 0.0;
     SegmentStats stats = { .mean = 0.0, .variance = 0.0 };
     if (right >= left) {
-        const long double median = vector_get_f(xsorted, (left + right) / 2);
+        const LDouble median = vector_get_f(xsorted, (left + right) / 2);
         for (uint32_t i = left; i <= right; i++) {
-            const long double sumi = vector_get_f(xsorted, i) - median;
+            const LDouble sumi = vector_get_f(xsorted, i) - median;
             sum   += sumi;
             sumsq += sumi * sumi;
@@ -306,7 +308,8 @@ SegmentStats shifted_data_variance(VectorF *xsorted, uint32_t left, uint32_t rig
     return stats;
 }
-void fill_row(State state, uint32_t q, uint32_t imin, uint32_t imax) {
+void fill_row(State state, uint32_t q, uint32_t imin, uint32_t imax)
+{
     uint32_t size = imax - q + 1;
     VectorI *split_candidates = vector_create_i(state.arena, size);
     for (uint32_t i = 0; i < size; i++) {
@@ -316,7 +319,8 @@ void fill_row(State state, uint32_t q, uint32_t imin, uint32_t imax) {
     smawk(state, rparams, split_candidates);
 }
-void smawk(State state, RowParams rparams, VectorI *split_candidates) {
+void smawk(State state, RowParams rparams, VectorI *split_candidates)
+{
     const uint32_t imin  = rparams.imin;
     const uint32_t imax  = rparams.imax;
     const uint32_t istep = rparams.istep;
@@ -326,9 +330,9 @@ void smawk(State state, RowParams rparams, VectorI *split_candidates) {
     } else {
         VectorI *odd_candidates = prune_candidates(state, rparams, split_candidates);
         /* printf("PRUNED\t"); vector_inspect_i(odd_candidates); */
-        uint32_t istepx2         = istep * 2;
-        uint32_t imin_odd        = imin + istep;
-        uint32_t imax_odd        = imin_odd + ((imax - imin_odd) / istepx2 * istepx2);
+        uint32_t istepx2        = istep * 2;
+        uint32_t imin_odd       = imin + istep;
+        uint32_t imax_odd       = imin_odd + ((imax - imin_odd) / istepx2 * istepx2);
         RowParams rparams_odd   = { .row = rparams.row, .imin = imin_odd, .imax = imax_odd, .istep = istepx2 };
         smawk(state, rparams_odd, odd_candidates);
@@ -342,32 +346,32 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat
     uint32_t imin    = rparams.imin;
     uint32_t imax    = rparams.imax;
     uint32_t istep   = rparams.istep;
-    uint32_t n       = split_candidates->nvalues;
+    uint32_t n       = split_candidates->size;
     uint32_t istepx2 = istep * 2;
     uint32_t jl      = vector_get_i(split_candidates, 0);
-    VectorF *xsum   = state.xsum;
-    VectorF *xsumsq = state.xsumsq;
-    MatrixI *splits = state.splits;
+    VectorF *xsum    = state.xsum;
+    VectorF *xsumsq  = state.xsumsq;
+    MatrixI *splits  = state.splits;
     for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
         while (vector_get_i(split_candidates, r) < jl) r++;
-        uint32_t rcandidate     = vector_get_i(split_candidates, r);
+        uint32_t rcandidate    = vector_get_i(split_candidates, r);
         uint32_t cost_base_row = row - 1;
         uint32_t cost_base_col = rcandidate - 1;
-        long double cost      =
+        LDouble cost           =
             matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissimilarity(rcandidate, i, xsum, xsumsq);
         matrix_set_f(state.cost, row, i, cost);
         matrix_set_i(state.splits, row, i, rcandidate);
-        uint32_t jh         =
+        uint32_t jh =
             (i + istep) <= imax
             ? matrix_get_i(splits, row, i + istep)
             : vector_get_i(split_candidates, n - 1);
-        uint32_t jmax       = jh < i ? jh : i;
-        long double sjimin = dissimilarity(jmax, i, xsum, xsumsq);
+        uint32_t jmax  = jh < i ? jh : i;
+        LDouble sjimin = dissimilarity(jmax, i, xsum, xsumsq);
         for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
             uint32_t jabs = vector_get_i(split_candidates, r);
@@ -375,9 +379,9 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat
             if (jabs > i) break;
             if (jabs < matrix_get_i(splits, row - 1, i)) continue;
-            long double cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
-            long double sj        = cost_base + dissimilarity(jabs, i, xsum, xsumsq);
-            long double cost_prev = matrix_get_f(state.cost, row, i);
+            LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
+            LDouble sj        = cost_base + dissimilarity(jabs, i, xsum, xsumsq);
+            LDouble cost_prev = matrix_get_f(state.cost, row, i);
             if (sj <= cost_prev) {
                 matrix_set_f(state.cost, row, i, sj);
@@ -394,10 +398,10 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat
 void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
 {
-    const uint32_t row     = rparams.row;
-    const uint32_t imin    = rparams.imin;
-    const uint32_t imax    = rparams.imax;
-    const uint32_t istep   = rparams.istep;
+    const uint32_t row    = rparams.row;
+    const uint32_t imin   = rparams.imin;
+    const uint32_t imax   = rparams.imax;
+    const uint32_t istep  = rparams.istep;
     MatrixF *const cost   = state.cost;
     MatrixI *const splits = state.splits;
@@ -408,19 +412,19 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can
         const uint32_t optimal_split_idx = optimal_split_idx_prev;
         const uint32_t optimal_split     = vector_get_i(split_candidates, optimal_split_idx);
         const uint32_t cost_prev         = matrix_get_f(cost, row - 1, optimal_split - 1);
-        const long double added_cost    = dissimilarity(optimal_split, i, state.xsum, state.xsumsq);
+        const LDouble added_cost         = dissimilarity(optimal_split, i, state.xsum, state.xsumsq);
         matrix_set_f(cost, row, i, cost_prev + added_cost);
         matrix_set_i(splits, row, i, optimal_split);
-        for (uint32_t r = optimal_split_idx + 1; r < split_candidates->nvalues; r++)
+        for (uint32_t r = optimal_split_idx + 1; r < split_candidates->size; r++)
         {
             uint32_t split = vector_get_i(split_candidates, r);
             if (split < matrix_get_i(splits, row - 1, i)) continue;
             if (split > i) break;
-            long double split_cost =
+            LDouble split_cost =
                 matrix_get_f(cost, row - 1, split - 1) + dissimilarity(split, i, state.xsum, state.xsumsq);
             if (split_cost > matrix_get_f(cost, row, i)) continue;
@@ -434,39 +438,39 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can
 VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
 {
-    uint32_t n = ((rparams.imax - rparams.imin) / rparams.istep) + 1;
-    uint32_t m = split_candidates->nvalues;
+    uint32_t imin  = rparams.imin;
+    uint32_t row   = rparams.row;
+    uint32_t istep = rparams.istep;
+    uint32_t n     = ((rparams.imax - imin) / istep) + 1;
+    uint32_t m     = split_candidates->size;
     if (n >= m) return split_candidates;
-    int32_t left    = -1;
+    uint32_t left   = 0;
     uint32_t right  = 0;
     VectorI *pruned = vector_dup_i(split_candidates, state.arena);
     while (m > n)
     {
-        uint32_t p         = left + 1;
-        uint32_t i         = rparams.imin + p * rparams.istep;
-        uint32_t j         = vector_get_i(pruned, right);
-        uint32_t jnext     = vector_get_i(pruned, right + 1);
-        long double sl    =
-            matrix_get_f(state.cost, rparams.row - 1, j - 1) + dissimilarity(j, i, state.xsum, state.xsumsq);
-        long double snext =
-            matrix_get_f(state.cost, rparams.row - 1, jnext - 1) + dissimilarity(jnext, i, state.xsum, state.xsumsq);
-        if ((sl < snext) && (p < n - 1)) {
+        uint32_t i     = imin + left * istep;
+        uint32_t j     = vector_get_i(pruned, right);
+        uint32_t jnext = vector_get_i(pruned, right + 1);
+        LDouble sl     =
+            matrix_get_f(state.cost, row - 1, j - 1) + dissimilarity(j, i, state.xsum, state.xsumsq);
+        LDouble snext  =
+            matrix_get_f(state.cost, row - 1, jnext - 1) + dissimilarity(jnext, i, state.xsum, state.xsumsq);
+        if ((sl < snext) && (left < n - 1)) {
+            vector_set_i(pruned, left, j);
             left++;
             right++;
-            vector_set_i(pruned, left, j);
-        } else if ((sl < snext) && (p == n - 1)) {
+        } else if ((sl < snext) && (left == n - 1)) {
             right++;
             m--;
             vector_set_i(pruned, right, j);
         } else {
-            if (p > 0) {
-                /* TODO: extract `vector_setcpy_T` */
-                vector_set_i(pruned, right, vector_get_i(pruned, left));
-                left--;
+            if (left > 0) {
+                vector_set_i(pruned, right, vector_get_i(pruned, --left));
             } else {
                 right++;
             }
@@ -475,8 +479,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida
         }
     }
-    for (uint32_t i = left + 1; i < m; i++) {
-        /* TODO: extract `vector_setcpy_T` */
+    for (uint32_t i = left; i < m; i++) {
         vector_set_i(pruned, i, vector_get_i(pruned, right++));
     }
@@ -485,101 +488,86 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida
     return pruned;
 }
-long double dissimilarity(uint32_t j, uint32_t i, VectorF *xsum, VectorF *xsumsq) {
-    long double sji = 0.0;
+inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
+    LDouble sji = 0.0;
     if (j >= i) return sji;
     if (j > 0) {
-        /* TODO: looks more like `segment_delta` */
-        long double segment_sum = vector_get_diff_f(xsum, i, j - 1);
-        uint32_t segment_size    = i - j + 1;
-        sji                     = vector_get_diff_f(xsumsq, i, j - 1) - (segment_sum * segment_sum / segment_size);
+        LDouble segment_diff  = vector_get_diff_f(xsum, i, j - 1);
+        uint32_t segment_size = i - j + 1;
+        sji                   = vector_get_diff_f(xsumsq, i, j - 1) - (segment_diff * segment_diff / segment_size);
     } else {
-        long double xsumi = vector_get_f(xsum, i);
-        sji               = vector_get_f(xsumsq, i) - (xsumi * xsumi / (i + 1));
+        LDouble xsumi = vector_get_f(xsum, i);
+        sji           = vector_get_f(xsumsq, i) - (xsumi * xsumi / (i + 1));
     }
     return (sji > 0) ? sji : 0.0;
 }
-VectorF *vector_create_f(Arena *arena, uint32_t nvalues) {
+inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
     VectorF *v;
-    v          = arena_alloc(arena, sizeof(*v));
-    v->values  = arena_alloc(arena, sizeof(*(v->values)) * nvalues);
-    v->nvalues = nvalues;
+    v         = arena_alloc(arena, sizeof(*v));
+    v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
+    v->size   = size;
     return v;
 }
-VectorI *vector_create_i(Arena *arena, uint32_t nvalues) {
+inline VectorI *vector_create_i(Arena *arena, uint32_t size) {
     VectorI *v;
-    v          = arena_alloc(arena, sizeof(*v));
-    v->values  = arena_alloc(arena, sizeof(*(v->values)) * nvalues);
-    v->nvalues = nvalues;
+    v         = arena_alloc(arena, sizeof(*v));
+    v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
+    v->size   = size;
     return v;
 }
-VectorI *vector_dup_i(VectorI *v, Arena *arena)
+inline VectorI *vector_dup_i(VectorI *v, Arena *arena)
 {
-    VectorI *vdup = vector_create_i(arena, v->nvalues);
+    VectorI *vdup = vector_create_i(arena, v->size);
-    /* TODO: use one memcpy call */
-    for (uint32_t i = 0; i < v->nvalues; i++) {
-        vector_set_i(vdup, i, vector_get_i(v, i));
-    }
+    memcpy(vdup->values, v->values, sizeof(*(v->values)) * v->size);
     return vdup;
 }
-void vector_set_f(VectorF *v, uint32_t offset, long double value) {
-    assert(offset < v->nvalues && "[vector_set_f] element index should be less than nvalues");
+inline void vector_set_f(VectorF *v, uint32_t offset, LDouble value) {
     *(v->values + offset) = value;
 }
-void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
-    assert(offset < v->nvalues && "[vector_set_i] element index should be less than nvalues");
+inline void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
     *(v->values + offset) = value;
 }
-uint32_t vector_get_i(VectorI *v, uint32_t offset) {
-    assert(offset < v->nvalues && "[vector_get_i] element index should be less than nvalues");
+inline uint32_t vector_get_i(VectorI *v, uint32_t offset) {
     return *(v->values + offset);
 }
-void vector_downsize_i(VectorI *v, uint32_t new_size) {
-    v->nvalues = new_size;
+inline void vector_downsize_i(VectorI *v, uint32_t new_size) {
+    v->size = new_size;
 }
 void vector_inspect_i(VectorI *v) {
-    for (uint32_t i = 0; i < v->nvalues - 1; i++)
+    for (uint32_t i = 0; i < v->size - 1; i++)
         printf("%u, ", vector_get_i(v, i));
-    printf("%u\n", vector_get_i(v, v->nvalues - 1));
+    printf("%u\n", vector_get_i(v, v->size - 1));
 }
-long double vector_get_f(VectorF *v, uint32_t offset) {
-    assert(offset < v->nvalues && "[vector_get_f] element index should be less than nvalues");
+inline LDouble vector_get_f(VectorF *v, uint32_t offset) {
     return *(v->values + offset);
 }
-long double vector_get_diff_f(VectorF *v, uint32_t i, uint32_t j) {
-    assert(i < v->nvalues && "[vector_get_diff_f] i should be less than nvalues");
-    assert(j < v->nvalues && "[vector_get_diff_f] j should be less than nvalues");
+inline LDouble vector_get_diff_f(VectorF *v, uint32_t i, uint32_t j) {
     return *(v->values + i) - *(v->values + j);
 }
 void vector_inspect_f(VectorF *v) {
-    for (uint32_t i = 0; i < v->nvalues - 1; i++)
+    for (uint32_t i = 0; i < v->size - 1; i++)
         printf("%Lf, ", vector_get_f(v, i));
-    printf("%Lf\n", vector_get_f(v, v->nvalues - 1));
+    printf("%Lf\n", vector_get_f(v, v->size - 1));
 }
 MatrixF *matrix_create_f(Arena *arena, uint32_t nrows, uint32_t ncols) {
@@ -604,18 +592,12 @@ MatrixI *matrix_create_i(Arena *arena, uint32_t nrows, uint32_t ncols) {
     return m;
 }
-void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j, long double value) {
-    assert(i < m->nrows && "[matrix_set_f] row offset should be less than nrows");
-    assert(j < m->cols &&  "[matrix_set_f] col offset should be less than ncols");
+inline void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j, LDouble value) {
     uint32_t offset = i * m->ncols + j;
     *(m->values + offset) = value;
 }
-long double matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
-    assert(i < m->nrows && "[matrix_get_f] row offset should be less than nrows");
-    assert(j < m->cols &&  "[matrix_get_f] col offset should be less than ncols");
+inline LDouble matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
     uint32_t offset = i * m->ncols + j;
     return *(m->values + offset);
 }
@@ -623,7 +605,7 @@ long double matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
 void matrix_inspect_f(MatrixF *m) {
     for (uint32_t i = 0; i < m->nrows; i++) {
         for (uint32_t j = 0; j < m->ncols - 1; j++) {
-            long double value = matrix_get_f(m, i, j);
+            LDouble value = matrix_get_f(m, i, j);
             printf("%Lf, ", value);
         }
@@ -639,23 +621,17 @@ void matrix_inspect_i(MatrixI *m) {
     }
 }
-void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
-    assert(i < m->nrows && "[matrix_set_i] row offset should be less than nrows");
-    assert(j < m->cols &&  "[matrix_set_i] col offset should be less than ncols");
+inline void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
     uint32_t offset = i * m->ncols + j;
     *(m->values + offset) = value;
 }
-uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
-    assert(i < m->nrows && "[matrix_get_i] row offset should be less than nrows");
-    assert(j < m->cols &&  "[matrix_get_i] col offset should be less than ncols");
+inline uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
     uint32_t offset = i * m->ncols + j;
     return *(m->values + offset);
 }
-Arena *arena_create(uint32_t capacity) {
+Arena *arena_create(size_t capacity) {
     if (capacity < ARENA_MIN_CAPACITY) {
         capacity = ARENA_MIN_CAPACITY;
     }
@@ -683,11 +659,11 @@ Arena *arena_create(uint32_t capacity) {
     return arena;
 }
-void *arena_alloc(Arena *arena, uint32_t size) {
+void *arena_alloc(Arena *arena, size_t size) {
     size = (size + 7) & ~7;
     if (arena->offset + size > arena->capacity) {
-        printf("Arena Out Of Memory\n");
+        rb_raise(rb_eNoMemError, "Arena Insufficient Capacity");
         return NULL;
     }
@@ -698,7 +674,8 @@ void *arena_alloc(Arena *arena, uint32_t size) {
 }
 void arena_destroy(Arena *arena) {
-    /* printf("[Arena Destroy] Capacity: %u, offset: %u, left: %u\n", arena->capacity, arena->offset, arena->capacity - arena->offset); */
+    /* double leftover = ((double) arena->capacity - arena->offset) / arena->capacity * 100; */
+    /* printf("[Arena Destroy] Capacity: %zu, offset: %zu, left: %2.2f%%\n", arena->capacity, arena->offset, leftover); */
     free(arena->buffer);
     free(arena);
 }

data/lib/ckmeans/clusterer.rb CHANGED Viewed

@@ -1,9 +1,7 @@
 # frozen_string_literal: true
 module Ckmeans
-  class Clusterer # rubocop:disable Style/Documentation, Metrics/ClassLength
-    PI_DOUBLE = Math::PI * 2
+  class Clusterer # rubocop:disable Style/Documentation
     def initialize(entries, kmin, kmax = kmin, kestimate = :regular)
       @xcount = entries.size
@@ -26,301 +24,7 @@ module Ckmeans
           sorted_group_sizes.each_with_object([]) do |size, groups|
             groups << @xsorted_original.shift(size)
           end
-=begin # rubocop:disable Style/BlockComments
-          @cost   = Array.new(kmax) { Array.new(xcount) { 0.0 } }
-          @splits = Array.new(kmax) { Array.new(xcount) { 0 } }
-          @xsum   = Array.new(xcount)
-          @xsumsq = Array.new(xcount)
-          shift     = xsorted[xcount / 2]
-          xsum[0]   = xsorted[0].to_f - shift
-          xsumsq[0] = xsum[0]**2
-          1.upto(xcount - 1) do |i|
-            xf = xsorted[i].to_f
-            xsum[i]      = xsum[i - 1] + xf - shift
-            xsumsq[i]    = xsumsq[i - 1] + ((xf - shift) * (xf - shift))
-            cost[0][i]   = dissim(0, i)
-            splits[0][i] = 0
-          end
-          kmax_idx = kmax - 1
-          1.upto(kmax_idx) do |q|
-            imin = q < kmax_idx ? [1, q].max : xcount - 1
-            fill_row(q, imin, xcount - 1)
-          end
-          kopt = koptimal
-          puts "RB COST\n", cost.map(&:inspect)
-          puts "RB SPLITS\n", splits.map(&:inspect)
-          puts "RB K OPTIMAL: #{kopt}"
-          backtrack(kopt).each_with_object(Array.new(kopt)) do |(q, left, right), res|
-            res[q] = xsorted[left..right]
-          end
-=end
-        end
-    end
-    private
-    attr_reader :cost, :splits, :xsum, :xsumsq, :xcount, :xsorted, :kmin, :kmax
-    def koptimal # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
-      kopt       = kmin
-      n          = xcount
-      max_bic    = 0.0
-      adjustment = kestimate == :sensitive ? 0.0 : 1.0 # Deviation from BIC formula to favor smaller clusters
-      kmin.upto(kmax) do |k|
-        sizes = backtrack(k).each_with_object(Array.new(k)) { |(q, left, right), sz| sz[q] = right - left + 1 }
-        index_left    = 0
-        index_right   = nil
-        loglikelihood = 0.0
-        bin_left      = nil
-        bin_right     = nil
-        k.times do |kb|
-          num_points_in_bin = sizes[kb]
-          index_right = index_left + num_points_in_bin - 1
-          if xsorted[index_left] < xsorted[index_right]
-            bin_left  = xsorted[index_left]
-            bin_right = xsorted[index_right]
-          elsif xsorted[index_left] == xsorted[index_right]
-            bin_left  = index_left == 0 ? xsorted[0] : (xsorted[index_left - 1] + xsorted[index_left]) / 2.0
-            bin_right = index_right < n - 1 ? (xsorted[index_right] + xsorted[index_right + 1]) / 2.0 : xsorted[n - 1]
-          else
-            raise "ERROR: binLeft > binRight"
-          end
-          bin_width = bin_right.to_f - bin_left
-          mean, variance = shifted_data_variance(index_left, index_right)
-          if variance > 0
-            (index_left..index_right).each do |i|
-              loglikelihood += -(xsorted[i] - mean) * (xsorted[i] - mean) / (2.0 * variance)
-            end
-            loglikelihood +=
-              num_points_in_bin *
-              ((Math.log(num_points_in_bin / n.to_f) * adjustment) - (0.5 * Math.log(PI_DOUBLE * variance)))
-          else
-            loglikelihood += num_points_in_bin * Math.log(1.0 / bin_width / n)
-          end
-          index_left = index_right + 1
-        end
-        bic = (2.0 * loglikelihood) - (((3 * k) - 1) * Math.log(n.to_f))
-        if k == kmin
-          max_bic = bic
-          kopt    = kmin
-        elsif bic > max_bic
-          max_bic = bic
-          kopt    = k
-        end
-      end
-      kopt
-    end
-    def shifted_data_variance(ileft, iright)
-      sum      = 0.0
-      sumsq    = 0.0
-      mean     = 0.0
-      variance = 0.0
-      n        = iright - ileft + 1
-      if iright >= ileft
-        median = xsorted[(ileft + iright) / 2].to_f
-        ileft.upto(iright) do |i|
-          sumi   = xsorted[i] - median
-          sum   += sumi
-          sumsq += sumi**2
-        end
-        mean     = (sum / n) + median
-        variance = (sumsq - (sum * sum / n)) / (n - 1) if n > 1
-      end
-      [mean, variance]
-    end
-    def backtrack(k)
-      return to_enum(__method__, k) unless block_given?
-      right = xcount - 1
-      left  = nil
-      (k - 1).downto(0) do |q|
-        left = splits[q][right]
-        yield q, left, right
-        right = left - 1 if q > 0
-      end
-    end
-    def dissim(j, i)
-      return 0.0 if j >= i
-      sji =
-        if j > 0
-          segment_sum = xsum[i] - xsum[j - 1]
-          segment_size = i - j + 1
-          xsumsq[i] - xsumsq[j - 1] - (segment_sum * segment_sum / segment_size)
-        else
-          xsumsq[i] - (xsum[i] * xsum[i] / (i + 1))
-        end
-      [0, sji].max
-    end
-    def fill_row(q, imin, imax)
-      size = imax - q + 1
-      js = Array.new(size) { |i| q + i }
-      smawk(imin, imax, 1, q, js)
-    end
-    def smawk(imin, imax, istep, q, js)
-      if (imax - imin) <= (0 * istep)
-        find_min_from_candidates(q, imin, imax, istep, js)
-      else
-        js_odd = prune_candidates(imin, imax, istep, q, js)
-        # puts "Pruned: #{js_odd.inspect}"
-        istepx2 = istep * 2
-        imin_odd = imin + istep
-        imax_odd = imin_odd + ((imax - imin_odd) / istepx2 * istepx2)
-        smawk(imin_odd, imax_odd, istepx2, q, js_odd)
-        fill_even_positions(imin, imax, istep, q, js)
-      end
-    end
-    def find_min_from_candidates(q, imin, imax, istep, js)
-      optimal_split_index_prev = 0
-      (imin..imax).step(istep) do |i|
-        optimal_split_index = optimal_split_index_prev
-        optimal_split       = js[optimal_split_index]
-        cost[q][i]          = cost[q - 1][optimal_split - 1] + dissim(optimal_split, i)
-        splits[q][i]        = optimal_split
-        ((optimal_split_index + 1)...js.size).each do |split_index|
-          jabs = js[split_index]
-          next if jabs < splits[q - 1][i]
-          break if jabs > i
-          sj = cost[q - 1][jabs - 1] + dissim(jabs, i)
-          next unless sj <= cost[q][i]
-          cost[q][i]               = sj
-          splits[q][i]             = js[split_index]
-          optimal_split_index_prev = split_index
-        end
-      end
-    end
-    def prune_candidates(imin, imax, istep, q, js)
-      n = ((imax - imin) / istep) + 1
-      m = js.size
-      return js if n >= m
-      pruned = js.dup
-      left = -1
-      right = 0
-      while m > n
-        p     = left + 1
-        i     = imin + (p * istep)
-        j     = pruned[right]
-        jnext = pruned[right + 1]
-        sl    = cost[q - 1][j - 1] + dissim(j, i)
-        snext = cost[q - 1][jnext - 1] + dissim(jnext, i)
-        if (sl < snext) && (p < n - 1)
-          left += 1
-          pruned[left] = j
-          right += 1
-        elsif (sl < snext) && (p == n - 1)
-          right += 1
-          pruned[right] = j
-          m -= 1
-        else
-          if p > 0
-            pruned[right] = pruned[left]
-            left -= 1
-          else
-            right += 1
-          end
-          m -= 1
-        end
-      end
-      ((left + 1)...m).each do |r|
-        pruned[r] = pruned[right]
-        right += 1
-      end
-      pruned.slice!(m..-1) if pruned.size > m
-      pruned
-    end
-    def fill_even_positions(imin, imax, istep, q, js)
-      n = js.size
-      istepx2 = istep * 2
-      jl = js[0]
-      i = imin
-      r = 0
-      while i <= imax
-        r += 1 while js[r] < jl
-        cost[q][i]   = cost[q - 1][js[r] - 1] + dissim(js[r], i)
-        splits[q][i] = js[r]
-        jh           = (i + istep) <= imax ? splits[q][i + istep] : js[n - 1]
-        jmax         = [jh, i].min
-        sjimin       = dissim(jmax, i)
-        r += 1
-        while r < n && js[r] <= jmax
-          jabs = js[r]
-          break if jabs > i
-          if jabs < splits[q - 1][i]
-            r += 1
-            next
-          end
-          cost_base = cost[q - 1][jabs - 1]
-          sj        = cost_base + dissim(jabs, i)
-          if sj <= cost[q][i]
-            cost[q][i]   = sj
-            splits[q][i] = jabs
-          elsif cost_base + sjimin > cost[q][i]
-            break
-          end
-          r += 1
         end
-        r -= 1
-        jl = jh
-        i += istepx2
-      end
     end
   end
 end

data/lib/ckmeans/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Ckmeans
-  VERSION = "1.0.1"
+  VERSION = "1.0.3"
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: ckmeans
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.0.3
 platform: ruby
 authors:
 - Vlad Lebedev
 bindir: exe
 cert_chain: []
-date: 2025-04-22 00:00:00.000000000 Z
+date: 2025-05-01 00:00:00.000000000 Z
 dependencies: []
 description: Repeatable clustering of unidimensional data
 email:
@@ -17,11 +17,13 @@ extensions:
 - ext/ckmeans/extconf.rb
 extra_rdoc_files: []
 files:
+- ".dockerignore"
 - ".rspec"
 - ".rubocop.yml"
 - ".rubocop_todo.yml"
 - ".ruby-version"
 - CHANGELOG.md
+- Dockerfile
 - LICENSE
 - README.md
 - Rakefile