ckmeans 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.ruby-version +1 -1
 - data/CHANGELOG.md +16 -3
 - data/README.md +10 -3
 - data/ext/ckmeans/extensions.c +90 -27
 - data/lib/ckmeans/clusterer.rb +0 -2
 - data/lib/ckmeans/version.rb +1 -1
 - data/lib/ckmeans.rb +2 -0
 - data/lib/ckmedian/clusterer.rb +29 -0
 - metadata +7 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 508f78311a643e1fa8e693e4abf1cdf6df4eb06ff09756fa534ff4a514d0f34f
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 4ef313387c2e45df4a8afde58e429093023a555a32f4af395a8b79c048a9d98d
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: ae0f1aff4bd6a78da04123d3728234012d0692ec22396b9529b245c8fa473343314508f053ee02ac876131b243704316948f476840d3c495d4e72eba68e095fd
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: ab95cfdacac4d9204887d4d5c5a7b85aafa3c869ec4b7a851ae994d8f15ddf096e99cb2d1691e6120a7a6bbe51cbde0524c93ad09b5811b7561c43c60a49256c
         
     | 
    
        data/.ruby-version
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            3.2. 
     | 
| 
      
 1 
     | 
    
         
            +
            3.2.8
         
     | 
    
        data/CHANGELOG.md
    CHANGED
    
    | 
         @@ -1,9 +1,22 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ## [Unreleased]
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
            ## [1.0. 
     | 
| 
      
 3 
     | 
    
         
            +
            ## [1.0.4] - 2025-05-01
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
            - https://github.com/vlebedeff/rb-ckmeans/pull/ 
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
      
 5 
     | 
    
         
            +
            - Simpler capacity size expression ([#14](https://github.com/vlebedeff/rb-ckmeans/pull/14))
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ## [1.0.3] - 2025-05-01
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            - More frugal memory allocation ([#11](https://github.com/vlebedeff/rb-ckmeans/pull/11))
         
     | 
| 
      
 10 
     | 
    
         
            +
            - Use `rb_iv_get` for brevity
         
     | 
| 
      
 11 
     | 
    
         
            +
            - Various optimizations ([#10](https://github.com/vlebedeff/rb-ckmeans/pull/10))
         
     | 
| 
      
 12 
     | 
    
         
            +
            - Extract `LDouble` type definition
         
     | 
| 
      
 13 
     | 
    
         
            +
            - Remove `ruby-prof` gem
         
     | 
| 
      
 14 
     | 
    
         
            +
            - Rename `nvalues` to `size`
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            ## [1.0.2] - 2025-04-24
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            - Polish & Housekeeping ([#9](https://github.com/vlebedeff/rb-ckmeans/pull/9))
         
     | 
| 
      
 19 
     | 
    
         
            +
            - Fix int variable sign ([#8](https://github.com/vlebedeff/rb-ckmeans/pull/8))
         
     | 
| 
       7 
20 
     | 
    
         | 
| 
       8 
21 
     | 
    
         
             
            ## [1.0.0] - 2025-04-22
         
     | 
| 
       9 
22 
     | 
    
         | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -19,9 +19,16 @@ gem install ckmeans 
     | 
|
| 
       19 
19 
     | 
    
         
             
            ## Usage
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
            ```rb
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
            Ckmeans::Clusterer(data, kmin 
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
      
 22 
     | 
    
         
            +
            # Fixed cluster count
         
     | 
| 
      
 23 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin).clusters
         
     | 
| 
      
 24 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin).clusters
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            # Estimate optimal cluster count within kmin and kmax
         
     | 
| 
      
 27 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin, kmax).clusters
         
     | 
| 
      
 28 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin, kmax).clusters
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            # Adjust Bayesian Information Criteria favoring more smaller clusters (Ckmeans only)
         
     | 
| 
      
 31 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin, kmax, :sensitive).clusters
         
     | 
| 
       25 
32 
     | 
    
         
             
            ```
         
     | 
| 
       26 
33 
     | 
    
         | 
| 
       27 
34 
     | 
    
         
             
            ## License
         
     | 
    
        data/ext/ckmeans/extensions.c
    CHANGED
    
    | 
         @@ -33,6 +33,8 @@ typedef struct VectorI { 
     | 
|
| 
       33 
33 
     | 
    
         
             
                uint32_t *values;
         
     | 
| 
       34 
34 
     | 
    
         
             
            } VectorI;
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
      
 36 
     | 
    
         
            +
            typedef LDouble (FnDissim)(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
       36 
38 
     | 
    
         
             
            typedef struct State {
         
     | 
| 
       37 
39 
     | 
    
         
             
                uint32_t xcount;
         
     | 
| 
       38 
40 
     | 
    
         
             
                uint32_t kmin;
         
     | 
| 
         @@ -44,6 +46,7 @@ typedef struct State { 
     | 
|
| 
       44 
46 
     | 
    
         
             
                MatrixI *splits;
         
     | 
| 
       45 
47 
     | 
    
         
             
                VectorF *xsum;
         
     | 
| 
       46 
48 
     | 
    
         
             
                VectorF *xsumsq;
         
     | 
| 
      
 49 
     | 
    
         
            +
                FnDissim *dissim;
         
     | 
| 
       47 
50 
     | 
    
         
             
            } State;
         
     | 
| 
       48 
51 
     | 
    
         | 
| 
       49 
52 
     | 
    
         
             
            typedef struct RowParams {
         
     | 
| 
         @@ -59,6 +62,8 @@ typedef struct { 
     | 
|
| 
       59 
62 
     | 
    
         
             
            } SegmentStats;
         
     | 
| 
       60 
63 
     | 
    
         | 
| 
       61 
64 
     | 
    
         
             
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 65 
     | 
    
         
            +
            VALUE rb_ckmedian_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 66 
     | 
    
         
            +
            VALUE rb_sorted_group_sizes(VALUE self, FnDissim*);
         
     | 
| 
       62 
67 
     | 
    
         | 
| 
       63 
68 
     | 
    
         
             
            Arena *arena_create(size_t);
         
     | 
| 
       64 
69 
     | 
    
         
             
            void  *arena_alloc(Arena*, size_t);
         
     | 
| 
         @@ -85,7 +90,8 @@ uint32_t vector_get_i(VectorI*, uint32_t offset); 
     | 
|
| 
       85 
90 
     | 
    
         
             
            void     vector_downsize_i(VectorI*, uint32_t);
         
     | 
| 
       86 
91 
     | 
    
         
             
            void     vector_inspect_i(VectorI*);
         
     | 
| 
       87 
92 
     | 
    
         | 
| 
       88 
     | 
    
         
            -
            LDouble       
     | 
| 
      
 93 
     | 
    
         
            +
            LDouble      dissimilarity_l2(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 94 
     | 
    
         
            +
            LDouble      dissimilarity_l1(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
       89 
95 
     | 
    
         
             
            void         fill_row(State, uint32_t, uint32_t, uint32_t);
         
     | 
| 
       90 
96 
     | 
    
         
             
            void         smawk(State, RowParams, VectorI*);
         
     | 
| 
       91 
97 
     | 
    
         
             
            void         find_min_from_candidates(State, RowParams, VectorI*);
         
     | 
| 
         @@ -95,30 +101,40 @@ SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t); 
     | 
|
| 
       95 
101 
     | 
    
         
             
            VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
         
     | 
| 
       96 
102 
     | 
    
         
             
            uint32_t     find_koptimal(State);
         
     | 
| 
       97 
103 
     | 
    
         | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
       98 
105 
     | 
    
         
             
            void Init_extensions(void) {
         
     | 
| 
       99 
     | 
    
         
            -
                VALUE ckmeans_module 
     | 
| 
       100 
     | 
    
         
            -
                VALUE  
     | 
| 
      
 106 
     | 
    
         
            +
                VALUE ckmeans_module     = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
         
     | 
| 
      
 107 
     | 
    
         
            +
                VALUE ckmedian_module    = rb_const_get(rb_cObject, rb_intern("Ckmedian"));
         
     | 
| 
      
 108 
     | 
    
         
            +
                VALUE ckmeans_clusterer  = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
         
     | 
| 
      
 109 
     | 
    
         
            +
                VALUE ckmedian_clusterer = rb_const_get(ckmedian_module, rb_intern("Clusterer"));
         
     | 
| 
       101 
110 
     | 
    
         | 
| 
       102 
     | 
    
         
            -
                rb_define_private_method( 
     | 
| 
      
 111 
     | 
    
         
            +
                rb_define_private_method(ckmeans_clusterer, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
         
     | 
| 
      
 112 
     | 
    
         
            +
                rb_define_private_method(ckmedian_clusterer, "sorted_group_sizes", rb_ckmedian_sorted_group_sizes, 0);
         
     | 
| 
       103 
113 
     | 
    
         
             
            }
         
     | 
| 
       104 
114 
     | 
    
         | 
| 
       105 
115 
     | 
    
         
             
            # define ARENA_MIN_CAPACITY 100
         
     | 
| 
      
 116 
     | 
    
         
            +
            # define ALLOCATION_FACTOR 3
         
     | 
| 
       106 
117 
     | 
    
         
             
            # define PIx2 (M_PI * 2.0)
         
     | 
| 
       107 
118 
     | 
    
         | 
| 
       108 
119 
     | 
    
         
             
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 120 
     | 
    
         
            +
            {
         
     | 
| 
      
 121 
     | 
    
         
            +
                return rb_sorted_group_sizes(self, dissimilarity_l2);
         
     | 
| 
      
 122 
     | 
    
         
            +
            }
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
            VALUE rb_ckmedian_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 125 
     | 
    
         
            +
            {
         
     | 
| 
      
 126 
     | 
    
         
            +
                return rb_sorted_group_sizes(self, dissimilarity_l1);
         
     | 
| 
      
 127 
     | 
    
         
            +
            }
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
            VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria)
         
     | 
| 
       109 
130 
     | 
    
         
             
            {
         
     | 
| 
       110 
131 
     | 
    
         
             
                uint32_t xcount      = NUM2UINT(rb_iv_get(self, "@xcount"));
         
     | 
| 
       111 
132 
     | 
    
         
             
                uint32_t kmin        = NUM2UINT(rb_iv_get(self, "@kmin"));
         
     | 
| 
       112 
133 
     | 
    
         
             
                uint32_t kmax        = NUM2UINT(rb_iv_get(self, "@kmax"));
         
     | 
| 
       113 
134 
     | 
    
         
             
                bool apply_deviation = RTEST(rb_iv_get(self, "@apply_bic_deviation"));
         
     | 
| 
       114 
135 
     | 
    
         
             
                VALUE rb_xsorted     = rb_iv_get(self, "@xsorted");
         
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                Arena *arena         =
         
     | 
| 
       117 
     | 
    
         
            -
                    arena_create(
         
     | 
| 
       118 
     | 
    
         
            -
                        sizeof(LDouble) * xcount * (kmax + 4) +
         
     | 
| 
       119 
     | 
    
         
            -
                        sizeof(uint32_t) * xcount * kmax * 5 +
         
     | 
| 
       120 
     | 
    
         
            -
                        ARENA_MIN_CAPACITY
         
     | 
| 
       121 
     | 
    
         
            -
                    );
         
     | 
| 
      
 136 
     | 
    
         
            +
                size_t capacity      = sizeof(LDouble) * (xcount + 1) * (kmax + 1) * ALLOCATION_FACTOR + ARENA_MIN_CAPACITY;
         
     | 
| 
      
 137 
     | 
    
         
            +
                Arena *arena         = arena_create(capacity);
         
     | 
| 
       122 
138 
     | 
    
         | 
| 
       123 
139 
     | 
    
         
             
                if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
         
     | 
| 
       124 
140 
     | 
    
         | 
| 
         @@ -143,7 +159,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       143 
159 
     | 
    
         
             
                    .cost            = cost,
         
     | 
| 
       144 
160 
     | 
    
         
             
                    .splits          = splits,
         
     | 
| 
       145 
161 
     | 
    
         
             
                    .xsum            = xsum,
         
     | 
| 
       146 
     | 
    
         
            -
                    .xsumsq          = xsumsq
         
     | 
| 
      
 162 
     | 
    
         
            +
                    .xsumsq          = xsumsq,
         
     | 
| 
      
 163 
     | 
    
         
            +
                    .dissim          = criteria
         
     | 
| 
       147 
164 
     | 
    
         
             
                };
         
     | 
| 
       148 
165 
     | 
    
         | 
| 
       149 
166 
     | 
    
         | 
| 
         @@ -161,7 +178,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       161 
178 
     | 
    
         | 
| 
       162 
179 
     | 
    
         
             
                    vector_set_f(xsum, i, xsum_prev + diff);
         
     | 
| 
       163 
180 
     | 
    
         
             
                    vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
         
     | 
| 
       164 
     | 
    
         
            -
                    matrix_set_f(cost, 0, i,  
     | 
| 
      
 181 
     | 
    
         
            +
                    matrix_set_f(cost, 0, i, criteria(0, i, xsum, xsumsq));
         
     | 
| 
       165 
182 
     | 
    
         
             
                    matrix_set_i(splits, 0, i, 0);
         
     | 
| 
       166 
183 
     | 
    
         
             
                }
         
     | 
| 
       167 
184 
     | 
    
         | 
| 
         @@ -340,7 +357,7 @@ void smawk(State state, RowParams rparams, VectorI *split_candidates) 
     | 
|
| 
       340 
357 
     | 
    
         
             
                }
         
     | 
| 
       341 
358 
     | 
    
         
             
            }
         
     | 
| 
       342 
359 
     | 
    
         | 
| 
       343 
     | 
    
         
            -
            void fill_even_positions(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 360 
     | 
    
         
            +
            inline void fill_even_positions(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       344 
361 
     | 
    
         
             
            {
         
     | 
| 
       345 
362 
     | 
    
         
             
                uint32_t row     = rparams.row;
         
     | 
| 
       346 
363 
     | 
    
         
             
                uint32_t imin    = rparams.imin;
         
     | 
| 
         @@ -349,9 +366,10 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       349 
366 
     | 
    
         
             
                uint32_t n       = split_candidates->size;
         
     | 
| 
       350 
367 
     | 
    
         
             
                uint32_t istepx2 = istep * 2;
         
     | 
| 
       351 
368 
     | 
    
         
             
                uint32_t jl      = vector_get_i(split_candidates, 0);
         
     | 
| 
       352 
     | 
    
         
            -
                VectorF *xsum    = state.xsum;
         
     | 
| 
       353 
     | 
    
         
            -
                VectorF *xsumsq  = state.xsumsq;
         
     | 
| 
       354 
     | 
    
         
            -
                MatrixI *splits  = state.splits;
         
     | 
| 
      
 369 
     | 
    
         
            +
                VectorF *const xsum    = state.xsum;
         
     | 
| 
      
 370 
     | 
    
         
            +
                VectorF *const xsumsq  = state.xsumsq;
         
     | 
| 
      
 371 
     | 
    
         
            +
                MatrixI *const splits  = state.splits;
         
     | 
| 
      
 372 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       355 
373 
     | 
    
         | 
| 
       356 
374 
     | 
    
         
             
                for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
         
     | 
| 
       357 
375 
     | 
    
         
             
                    while (vector_get_i(split_candidates, r) < jl) r++;
         
     | 
| 
         @@ -360,7 +378,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       360 
378 
     | 
    
         
             
                    uint32_t cost_base_row = row - 1;
         
     | 
| 
       361 
379 
     | 
    
         
             
                    uint32_t cost_base_col = rcandidate - 1;
         
     | 
| 
       362 
380 
     | 
    
         
             
                    LDouble cost           =
         
     | 
| 
       363 
     | 
    
         
            -
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) +  
     | 
| 
      
 381 
     | 
    
         
            +
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissim(rcandidate, i, xsum, xsumsq);
         
     | 
| 
       364 
382 
     | 
    
         | 
| 
       365 
383 
     | 
    
         
             
                    matrix_set_f(state.cost, row, i, cost);
         
     | 
| 
       366 
384 
     | 
    
         
             
                    matrix_set_i(state.splits, row, i, rcandidate);
         
     | 
| 
         @@ -371,7 +389,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       371 
389 
     | 
    
         
             
                        : vector_get_i(split_candidates, n - 1);
         
     | 
| 
       372 
390 
     | 
    
         | 
| 
       373 
391 
     | 
    
         
             
                    uint32_t jmax  = jh < i ? jh : i;
         
     | 
| 
       374 
     | 
    
         
            -
                    LDouble sjimin =  
     | 
| 
      
 392 
     | 
    
         
            +
                    LDouble sjimin = dissim(jmax, i, xsum, xsumsq);
         
     | 
| 
       375 
393 
     | 
    
         | 
| 
       376 
394 
     | 
    
         
             
                    for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
         
     | 
| 
       377 
395 
     | 
    
         
             
                        uint32_t jabs = vector_get_i(split_candidates, r);
         
     | 
| 
         @@ -380,7 +398,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       380 
398 
     | 
    
         
             
                        if (jabs < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       381 
399 
     | 
    
         | 
| 
       382 
400 
     | 
    
         
             
                        LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
         
     | 
| 
       383 
     | 
    
         
            -
                        LDouble sj        = cost_base +  
     | 
| 
      
 401 
     | 
    
         
            +
                        LDouble sj        = cost_base + dissim(jabs, i, xsum, xsumsq);
         
     | 
| 
       384 
402 
     | 
    
         
             
                        LDouble cost_prev = matrix_get_f(state.cost, row, i);
         
     | 
| 
       385 
403 
     | 
    
         | 
| 
       386 
404 
     | 
    
         
             
                        if (sj <= cost_prev) {
         
     | 
| 
         @@ -396,7 +414,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       396 
414 
     | 
    
         
             
                }
         
     | 
| 
       397 
415 
     | 
    
         
             
            }
         
     | 
| 
       398 
416 
     | 
    
         | 
| 
       399 
     | 
    
         
            -
            void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 417 
     | 
    
         
            +
            inline void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       400 
418 
     | 
    
         
             
            {
         
     | 
| 
       401 
419 
     | 
    
         
             
                const uint32_t row    = rparams.row;
         
     | 
| 
       402 
420 
     | 
    
         
             
                const uint32_t imin   = rparams.imin;
         
     | 
| 
         @@ -404,6 +422,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       404 
422 
     | 
    
         
             
                const uint32_t istep  = rparams.istep;
         
     | 
| 
       405 
423 
     | 
    
         
             
                MatrixF *const cost   = state.cost;
         
     | 
| 
       406 
424 
     | 
    
         
             
                MatrixI *const splits = state.splits;
         
     | 
| 
      
 425 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       407 
426 
     | 
    
         | 
| 
       408 
427 
     | 
    
         
             
                uint32_t optimal_split_idx_prev = 0;
         
     | 
| 
       409 
428 
     | 
    
         | 
| 
         @@ -412,7 +431,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       412 
431 
     | 
    
         
             
                    const uint32_t optimal_split_idx = optimal_split_idx_prev;
         
     | 
| 
       413 
432 
     | 
    
         
             
                    const uint32_t optimal_split     = vector_get_i(split_candidates, optimal_split_idx);
         
     | 
| 
       414 
433 
     | 
    
         
             
                    const uint32_t cost_prev         = matrix_get_f(cost, row - 1, optimal_split - 1);
         
     | 
| 
       415 
     | 
    
         
            -
                    const LDouble added_cost         =  
     | 
| 
      
 434 
     | 
    
         
            +
                    const LDouble added_cost         = dissim(optimal_split, i, state.xsum, state.xsumsq);
         
     | 
| 
       416 
435 
     | 
    
         | 
| 
       417 
436 
     | 
    
         
             
                    matrix_set_f(cost, row, i, cost_prev + added_cost);
         
     | 
| 
       418 
437 
     | 
    
         
             
                    matrix_set_i(splits, row, i, optimal_split);
         
     | 
| 
         @@ -425,7 +444,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       425 
444 
     | 
    
         
             
                        if (split > i) break;
         
     | 
| 
       426 
445 
     | 
    
         | 
| 
       427 
446 
     | 
    
         
             
                        LDouble split_cost =
         
     | 
| 
       428 
     | 
    
         
            -
                            matrix_get_f(cost, row - 1, split - 1) +  
     | 
| 
      
 447 
     | 
    
         
            +
                            matrix_get_f(cost, row - 1, split - 1) + dissim(split, i, state.xsum, state.xsumsq);
         
     | 
| 
       429 
448 
     | 
    
         | 
| 
       430 
449 
     | 
    
         
             
                        if (split_cost > matrix_get_f(cost, row, i)) continue;
         
     | 
| 
       431 
450 
     | 
    
         | 
| 
         @@ -436,7 +455,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       436 
455 
     | 
    
         
             
                }
         
     | 
| 
       437 
456 
     | 
    
         
             
            }
         
     | 
| 
       438 
457 
     | 
    
         | 
| 
       439 
     | 
    
         
            -
            VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 458 
     | 
    
         
            +
            inline VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       440 
459 
     | 
    
         
             
            {
         
     | 
| 
       441 
460 
     | 
    
         
             
                uint32_t imin  = rparams.imin;
         
     | 
| 
       442 
461 
     | 
    
         
             
                uint32_t row   = rparams.row;
         
     | 
| 
         @@ -449,6 +468,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       449 
468 
     | 
    
         
             
                uint32_t left   = 0;
         
     | 
| 
       450 
469 
     | 
    
         
             
                uint32_t right  = 0;
         
     | 
| 
       451 
470 
     | 
    
         
             
                VectorI *pruned = vector_dup_i(split_candidates, state.arena);
         
     | 
| 
      
 471 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       452 
472 
     | 
    
         | 
| 
       453 
473 
     | 
    
         
             
                while (m > n)
         
     | 
| 
       454 
474 
     | 
    
         
             
                {
         
     | 
| 
         @@ -456,9 +476,9 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       456 
476 
     | 
    
         
             
                    uint32_t j     = vector_get_i(pruned, right);
         
     | 
| 
       457 
477 
     | 
    
         
             
                    uint32_t jnext = vector_get_i(pruned, right + 1);
         
     | 
| 
       458 
478 
     | 
    
         
             
                    LDouble sl     =
         
     | 
| 
       459 
     | 
    
         
            -
                        matrix_get_f(state.cost, row - 1, j - 1) +  
     | 
| 
      
 479 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, j - 1) + dissim(j, i, state.xsum, state.xsumsq);
         
     | 
| 
       460 
480 
     | 
    
         
             
                    LDouble snext  =
         
     | 
| 
       461 
     | 
    
         
            -
                        matrix_get_f(state.cost, row - 1, jnext - 1) +  
     | 
| 
      
 481 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, jnext - 1) + dissim(jnext, i, state.xsum, state.xsumsq);
         
     | 
| 
       462 
482 
     | 
    
         | 
| 
       463 
483 
     | 
    
         
             
                    if ((sl < snext) && (left < n - 1)) {
         
     | 
| 
       464 
484 
     | 
    
         
             
                        vector_set_i(pruned, left, j);
         
     | 
| 
         @@ -488,7 +508,8 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       488 
508 
     | 
    
         
             
                return pruned;
         
     | 
| 
       489 
509 
     | 
    
         
             
            }
         
     | 
| 
       490 
510 
     | 
    
         | 
| 
       491 
     | 
    
         
            -
             
     | 
| 
      
 511 
     | 
    
         
            +
            /* L2 aka Euclidean aka Mean dissimilarity criteria */
         
     | 
| 
      
 512 
     | 
    
         
            +
            inline LDouble dissimilarity_l2(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
         
     | 
| 
       492 
513 
     | 
    
         
             
                LDouble sji = 0.0;
         
     | 
| 
       493 
514 
     | 
    
         | 
| 
       494 
515 
     | 
    
         
             
                if (j >= i) return sji;
         
     | 
| 
         @@ -505,6 +526,48 @@ inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, Vec 
     | 
|
| 
       505 
526 
     | 
    
         
             
                return (sji > 0) ? sji : 0.0;
         
     | 
| 
       506 
527 
     | 
    
         
             
            }
         
     | 
| 
       507 
528 
     | 
    
         | 
| 
      
 529 
     | 
    
         
            +
            /* L1 aka Manhattan aka Median dissimilarity criteria */
         
     | 
| 
      
 530 
     | 
    
         
            +
            inline LDouble dissimilarity_l1(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict _xsumsq)
         
     | 
| 
      
 531 
     | 
    
         
            +
            {
         
     | 
| 
      
 532 
     | 
    
         
            +
                LDouble sji = 0.0;
         
     | 
| 
      
 533 
     | 
    
         
            +
             
     | 
| 
      
 534 
     | 
    
         
            +
                if (j >= i) return sji;
         
     | 
| 
      
 535 
     | 
    
         
            +
             
     | 
| 
      
 536 
     | 
    
         
            +
                if (j > 0) {
         
     | 
| 
      
 537 
     | 
    
         
            +
                    uint32_t median_idx = (i + j) >> 1;
         
     | 
| 
      
 538 
     | 
    
         
            +
             
     | 
| 
      
 539 
     | 
    
         
            +
                    if (((i - j + 1) % 2) == 1) {
         
     | 
| 
      
 540 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 541 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx - 1)
         
     | 
| 
      
 542 
     | 
    
         
            +
                            + vector_get_f(xsum, j - 1)
         
     | 
| 
      
 543 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 544 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 545 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 546 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 547 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx)
         
     | 
| 
      
 548 
     | 
    
         
            +
                            + vector_get_f(xsum, j - 1)
         
     | 
| 
      
 549 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 550 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 551 
     | 
    
         
            +
                    }
         
     | 
| 
      
 552 
     | 
    
         
            +
                } else { // j == 0
         
     | 
| 
      
 553 
     | 
    
         
            +
                    uint32_t median_idx = i >> 1;
         
     | 
| 
      
 554 
     | 
    
         
            +
             
     | 
| 
      
 555 
     | 
    
         
            +
                    if (((i + 1) % 2) == 1) {
         
     | 
| 
      
 556 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 557 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx - 1)
         
     | 
| 
      
 558 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 559 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 560 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 561 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 562 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx)
         
     | 
| 
      
 563 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 564 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 565 
     | 
    
         
            +
                    }
         
     | 
| 
      
 566 
     | 
    
         
            +
                }
         
     | 
| 
      
 567 
     | 
    
         
            +
             
     | 
| 
      
 568 
     | 
    
         
            +
                return (sji < 0) ? 0.0 : sji;
         
     | 
| 
      
 569 
     | 
    
         
            +
            }
         
     | 
| 
      
 570 
     | 
    
         
            +
             
     | 
| 
       508 
571 
     | 
    
         
             
            inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
         
     | 
| 
       509 
572 
     | 
    
         
             
                VectorF *v;
         
     | 
| 
       510 
573 
     | 
    
         | 
    
        data/lib/ckmeans/clusterer.rb
    CHANGED
    
    
    
        data/lib/ckmeans/version.rb
    CHANGED
    
    
    
        data/lib/ckmeans.rb
    CHANGED
    
    
| 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Ckmedian
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Clusterer # rubocop:disable Style/Documentation
         
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(entries, kmin, kmax = kmin)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @xcount = entries.size
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  raise ArgumentError, "Minimum cluster count is bigger than element count" if kmin > @xcount
         
     | 
| 
      
 9 
     | 
    
         
            +
                  raise ArgumentError, "Maximum cluster count is bigger than element count" if kmax > @xcount
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                  @kmin             = kmin
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @unique_xcount    = entries.uniq.size
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @kmax             = [@unique_xcount, kmax].min
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @xsorted_original = entries.sort
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @xsorted          = @xsorted_original.map(&:to_f)
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                def clusters
         
     | 
| 
      
 19 
     | 
    
         
            +
                  @clusters ||=
         
     | 
| 
      
 20 
     | 
    
         
            +
                    if @unique_xcount <= 1
         
     | 
| 
      
 21 
     | 
    
         
            +
                      [@xsorted_original]
         
     | 
| 
      
 22 
     | 
    
         
            +
                    else
         
     | 
| 
      
 23 
     | 
    
         
            +
                      sorted_group_sizes.each_with_object([]) do |size, groups|
         
     | 
| 
      
 24 
     | 
    
         
            +
                        groups << @xsorted_original.shift(size)
         
     | 
| 
      
 25 
     | 
    
         
            +
                      end
         
     | 
| 
      
 26 
     | 
    
         
            +
                    end
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: ckmeans
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.1.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Vlad Lebedev
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire:
         
     | 
| 
       8 
9 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       9 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       10 
     | 
    
         
            -
            date: 2025-05- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2025-05-23 00:00:00.000000000 Z
         
     | 
| 
       11 
12 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       12 
13 
     | 
    
         
             
            description: Repeatable clustering of unidimensional data
         
     | 
| 
       13 
14 
     | 
    
         
             
            email:
         
     | 
| 
         @@ -32,6 +33,7 @@ files: 
     | 
|
| 
       32 
33 
     | 
    
         
             
            - lib/ckmeans.rb
         
     | 
| 
       33 
34 
     | 
    
         
             
            - lib/ckmeans/clusterer.rb
         
     | 
| 
       34 
35 
     | 
    
         
             
            - lib/ckmeans/version.rb
         
     | 
| 
      
 36 
     | 
    
         
            +
            - lib/ckmedian/clusterer.rb
         
     | 
| 
       35 
37 
     | 
    
         
             
            - sig/ckmeans.rbs
         
     | 
| 
       36 
38 
     | 
    
         
             
            homepage: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       37 
39 
     | 
    
         
             
            licenses:
         
     | 
| 
         @@ -41,6 +43,7 @@ metadata: 
     | 
|
| 
       41 
43 
     | 
    
         
             
              homepage_uri: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       42 
44 
     | 
    
         
             
              source_code_uri: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       43 
45 
     | 
    
         
             
              changelog_uri: https://github.com/vlebedeff/rb-ckmeans/blob/main/CHANGELOG.md
         
     | 
| 
      
 46 
     | 
    
         
            +
            post_install_message:
         
     | 
| 
       44 
47 
     | 
    
         
             
            rdoc_options: []
         
     | 
| 
       45 
48 
     | 
    
         
             
            require_paths:
         
     | 
| 
       46 
49 
     | 
    
         
             
            - lib
         
     | 
| 
         @@ -55,7 +58,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       55 
58 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       56 
59 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       57 
60 
     | 
    
         
             
            requirements: []
         
     | 
| 
       58 
     | 
    
         
            -
            rubygems_version: 3. 
     | 
| 
      
 61 
     | 
    
         
            +
            rubygems_version: 3.4.19
         
     | 
| 
      
 62 
     | 
    
         
            +
            signing_key:
         
     | 
| 
       59 
63 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       60 
64 
     | 
    
         
             
            summary: Ruby implementation of Ckmeans.1d.dp
         
     | 
| 
       61 
65 
     | 
    
         
             
            test_files: []
         
     |