ckmeans 1.0.4 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.ruby-version +1 -1
 - data/README.md +10 -3
 - data/ext/ckmeans/extensions.c +87 -20
 - data/lib/ckmeans/clusterer.rb +0 -2
 - data/lib/ckmeans/version.rb +1 -1
 - data/lib/ckmeans.rb +2 -0
 - data/lib/ckmedian/clusterer.rb +29 -0
 - metadata +7 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 508f78311a643e1fa8e693e4abf1cdf6df4eb06ff09756fa534ff4a514d0f34f
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 4ef313387c2e45df4a8afde58e429093023a555a32f4af395a8b79c048a9d98d
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: ae0f1aff4bd6a78da04123d3728234012d0692ec22396b9529b245c8fa473343314508f053ee02ac876131b243704316948f476840d3c495d4e72eba68e095fd
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: ab95cfdacac4d9204887d4d5c5a7b85aafa3c869ec4b7a851ae994d8f15ddf096e99cb2d1691e6120a7a6bbe51cbde0524c93ad09b5811b7561c43c60a49256c
         
     | 
    
        data/.ruby-version
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            3.2. 
     | 
| 
      
 1 
     | 
    
         
            +
            3.2.8
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -19,9 +19,16 @@ gem install ckmeans 
     | 
|
| 
       19 
19 
     | 
    
         
             
            ## Usage
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
       21 
21 
     | 
    
         
             
            ```rb
         
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
            Ckmeans::Clusterer(data, kmin 
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
      
 22 
     | 
    
         
            +
            # Fixed cluster count
         
     | 
| 
      
 23 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin).clusters
         
     | 
| 
      
 24 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin).clusters
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            # Estimate optimal cluster count within kmin and kmax
         
     | 
| 
      
 27 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin, kmax).clusters
         
     | 
| 
      
 28 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin, kmax).clusters
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            # Adjust Bayesian Information Criteria favoring more smaller clusters (Ckmeans only)
         
     | 
| 
      
 31 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin, kmax, :sensitive).clusters
         
     | 
| 
       25 
32 
     | 
    
         
             
            ```
         
     | 
| 
       26 
33 
     | 
    
         | 
| 
       27 
34 
     | 
    
         
             
            ## License
         
     | 
    
        data/ext/ckmeans/extensions.c
    CHANGED
    
    | 
         @@ -33,6 +33,8 @@ typedef struct VectorI { 
     | 
|
| 
       33 
33 
     | 
    
         
             
                uint32_t *values;
         
     | 
| 
       34 
34 
     | 
    
         
             
            } VectorI;
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
      
 36 
     | 
    
         
            +
            typedef LDouble (FnDissim)(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
       36 
38 
     | 
    
         
             
            typedef struct State {
         
     | 
| 
       37 
39 
     | 
    
         
             
                uint32_t xcount;
         
     | 
| 
       38 
40 
     | 
    
         
             
                uint32_t kmin;
         
     | 
| 
         @@ -44,6 +46,7 @@ typedef struct State { 
     | 
|
| 
       44 
46 
     | 
    
         
             
                MatrixI *splits;
         
     | 
| 
       45 
47 
     | 
    
         
             
                VectorF *xsum;
         
     | 
| 
       46 
48 
     | 
    
         
             
                VectorF *xsumsq;
         
     | 
| 
      
 49 
     | 
    
         
            +
                FnDissim *dissim;
         
     | 
| 
       47 
50 
     | 
    
         
             
            } State;
         
     | 
| 
       48 
51 
     | 
    
         | 
| 
       49 
52 
     | 
    
         
             
            typedef struct RowParams {
         
     | 
| 
         @@ -59,6 +62,8 @@ typedef struct { 
     | 
|
| 
       59 
62 
     | 
    
         
             
            } SegmentStats;
         
     | 
| 
       60 
63 
     | 
    
         | 
| 
       61 
64 
     | 
    
         
             
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 65 
     | 
    
         
            +
            VALUE rb_ckmedian_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 66 
     | 
    
         
            +
            VALUE rb_sorted_group_sizes(VALUE self, FnDissim*);
         
     | 
| 
       62 
67 
     | 
    
         | 
| 
       63 
68 
     | 
    
         
             
            Arena *arena_create(size_t);
         
     | 
| 
       64 
69 
     | 
    
         
             
            void  *arena_alloc(Arena*, size_t);
         
     | 
| 
         @@ -85,7 +90,8 @@ uint32_t vector_get_i(VectorI*, uint32_t offset); 
     | 
|
| 
       85 
90 
     | 
    
         
             
            void     vector_downsize_i(VectorI*, uint32_t);
         
     | 
| 
       86 
91 
     | 
    
         
             
            void     vector_inspect_i(VectorI*);
         
     | 
| 
       87 
92 
     | 
    
         | 
| 
       88 
     | 
    
         
            -
            LDouble       
     | 
| 
      
 93 
     | 
    
         
            +
            LDouble      dissimilarity_l2(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 94 
     | 
    
         
            +
            LDouble      dissimilarity_l1(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
       89 
95 
     | 
    
         
             
            void         fill_row(State, uint32_t, uint32_t, uint32_t);
         
     | 
| 
       90 
96 
     | 
    
         
             
            void         smawk(State, RowParams, VectorI*);
         
     | 
| 
       91 
97 
     | 
    
         
             
            void         find_min_from_candidates(State, RowParams, VectorI*);
         
     | 
| 
         @@ -95,11 +101,15 @@ SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t); 
     | 
|
| 
       95 
101 
     | 
    
         
             
            VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
         
     | 
| 
       96 
102 
     | 
    
         
             
            uint32_t     find_koptimal(State);
         
     | 
| 
       97 
103 
     | 
    
         | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
       98 
105 
     | 
    
         
             
            void Init_extensions(void) {
         
     | 
| 
       99 
     | 
    
         
            -
                VALUE ckmeans_module 
     | 
| 
       100 
     | 
    
         
            -
                VALUE  
     | 
| 
      
 106 
     | 
    
         
            +
                VALUE ckmeans_module     = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
         
     | 
| 
      
 107 
     | 
    
         
            +
                VALUE ckmedian_module    = rb_const_get(rb_cObject, rb_intern("Ckmedian"));
         
     | 
| 
      
 108 
     | 
    
         
            +
                VALUE ckmeans_clusterer  = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
         
     | 
| 
      
 109 
     | 
    
         
            +
                VALUE ckmedian_clusterer = rb_const_get(ckmedian_module, rb_intern("Clusterer"));
         
     | 
| 
       101 
110 
     | 
    
         | 
| 
       102 
     | 
    
         
            -
                rb_define_private_method( 
     | 
| 
      
 111 
     | 
    
         
            +
                rb_define_private_method(ckmeans_clusterer, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
         
     | 
| 
      
 112 
     | 
    
         
            +
                rb_define_private_method(ckmedian_clusterer, "sorted_group_sizes", rb_ckmedian_sorted_group_sizes, 0);
         
     | 
| 
       103 
113 
     | 
    
         
             
            }
         
     | 
| 
       104 
114 
     | 
    
         | 
| 
       105 
115 
     | 
    
         
             
            # define ARENA_MIN_CAPACITY 100
         
     | 
| 
         @@ -107,6 +117,16 @@ void Init_extensions(void) { 
     | 
|
| 
       107 
117 
     | 
    
         
             
            # define PIx2 (M_PI * 2.0)
         
     | 
| 
       108 
118 
     | 
    
         | 
| 
       109 
119 
     | 
    
         
             
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 120 
     | 
    
         
            +
            {
         
     | 
| 
      
 121 
     | 
    
         
            +
                return rb_sorted_group_sizes(self, dissimilarity_l2);
         
     | 
| 
      
 122 
     | 
    
         
            +
            }
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
            VALUE rb_ckmedian_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 125 
     | 
    
         
            +
            {
         
     | 
| 
      
 126 
     | 
    
         
            +
                return rb_sorted_group_sizes(self, dissimilarity_l1);
         
     | 
| 
      
 127 
     | 
    
         
            +
            }
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
            VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria)
         
     | 
| 
       110 
130 
     | 
    
         
             
            {
         
     | 
| 
       111 
131 
     | 
    
         
             
                uint32_t xcount      = NUM2UINT(rb_iv_get(self, "@xcount"));
         
     | 
| 
       112 
132 
     | 
    
         
             
                uint32_t kmin        = NUM2UINT(rb_iv_get(self, "@kmin"));
         
     | 
| 
         @@ -139,7 +159,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       139 
159 
     | 
    
         
             
                    .cost            = cost,
         
     | 
| 
       140 
160 
     | 
    
         
             
                    .splits          = splits,
         
     | 
| 
       141 
161 
     | 
    
         
             
                    .xsum            = xsum,
         
     | 
| 
       142 
     | 
    
         
            -
                    .xsumsq          = xsumsq
         
     | 
| 
      
 162 
     | 
    
         
            +
                    .xsumsq          = xsumsq,
         
     | 
| 
      
 163 
     | 
    
         
            +
                    .dissim          = criteria
         
     | 
| 
       143 
164 
     | 
    
         
             
                };
         
     | 
| 
       144 
165 
     | 
    
         | 
| 
       145 
166 
     | 
    
         | 
| 
         @@ -157,7 +178,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       157 
178 
     | 
    
         | 
| 
       158 
179 
     | 
    
         
             
                    vector_set_f(xsum, i, xsum_prev + diff);
         
     | 
| 
       159 
180 
     | 
    
         
             
                    vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
         
     | 
| 
       160 
     | 
    
         
            -
                    matrix_set_f(cost, 0, i,  
     | 
| 
      
 181 
     | 
    
         
            +
                    matrix_set_f(cost, 0, i, criteria(0, i, xsum, xsumsq));
         
     | 
| 
       161 
182 
     | 
    
         
             
                    matrix_set_i(splits, 0, i, 0);
         
     | 
| 
       162 
183 
     | 
    
         
             
                }
         
     | 
| 
       163 
184 
     | 
    
         | 
| 
         @@ -336,7 +357,7 @@ void smawk(State state, RowParams rparams, VectorI *split_candidates) 
     | 
|
| 
       336 
357 
     | 
    
         
             
                }
         
     | 
| 
       337 
358 
     | 
    
         
             
            }
         
     | 
| 
       338 
359 
     | 
    
         | 
| 
       339 
     | 
    
         
            -
            void fill_even_positions(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 360 
     | 
    
         
            +
            inline void fill_even_positions(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       340 
361 
     | 
    
         
             
            {
         
     | 
| 
       341 
362 
     | 
    
         
             
                uint32_t row     = rparams.row;
         
     | 
| 
       342 
363 
     | 
    
         
             
                uint32_t imin    = rparams.imin;
         
     | 
| 
         @@ -345,9 +366,10 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       345 
366 
     | 
    
         
             
                uint32_t n       = split_candidates->size;
         
     | 
| 
       346 
367 
     | 
    
         
             
                uint32_t istepx2 = istep * 2;
         
     | 
| 
       347 
368 
     | 
    
         
             
                uint32_t jl      = vector_get_i(split_candidates, 0);
         
     | 
| 
       348 
     | 
    
         
            -
                VectorF *xsum    = state.xsum;
         
     | 
| 
       349 
     | 
    
         
            -
                VectorF *xsumsq  = state.xsumsq;
         
     | 
| 
       350 
     | 
    
         
            -
                MatrixI *splits  = state.splits;
         
     | 
| 
      
 369 
     | 
    
         
            +
                VectorF *const xsum    = state.xsum;
         
     | 
| 
      
 370 
     | 
    
         
            +
                VectorF *const xsumsq  = state.xsumsq;
         
     | 
| 
      
 371 
     | 
    
         
            +
                MatrixI *const splits  = state.splits;
         
     | 
| 
      
 372 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       351 
373 
     | 
    
         | 
| 
       352 
374 
     | 
    
         
             
                for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
         
     | 
| 
       353 
375 
     | 
    
         
             
                    while (vector_get_i(split_candidates, r) < jl) r++;
         
     | 
| 
         @@ -356,7 +378,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       356 
378 
     | 
    
         
             
                    uint32_t cost_base_row = row - 1;
         
     | 
| 
       357 
379 
     | 
    
         
             
                    uint32_t cost_base_col = rcandidate - 1;
         
     | 
| 
       358 
380 
     | 
    
         
             
                    LDouble cost           =
         
     | 
| 
       359 
     | 
    
         
            -
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) +  
     | 
| 
      
 381 
     | 
    
         
            +
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissim(rcandidate, i, xsum, xsumsq);
         
     | 
| 
       360 
382 
     | 
    
         | 
| 
       361 
383 
     | 
    
         
             
                    matrix_set_f(state.cost, row, i, cost);
         
     | 
| 
       362 
384 
     | 
    
         
             
                    matrix_set_i(state.splits, row, i, rcandidate);
         
     | 
| 
         @@ -367,7 +389,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       367 
389 
     | 
    
         
             
                        : vector_get_i(split_candidates, n - 1);
         
     | 
| 
       368 
390 
     | 
    
         | 
| 
       369 
391 
     | 
    
         
             
                    uint32_t jmax  = jh < i ? jh : i;
         
     | 
| 
       370 
     | 
    
         
            -
                    LDouble sjimin =  
     | 
| 
      
 392 
     | 
    
         
            +
                    LDouble sjimin = dissim(jmax, i, xsum, xsumsq);
         
     | 
| 
       371 
393 
     | 
    
         | 
| 
       372 
394 
     | 
    
         
             
                    for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
         
     | 
| 
       373 
395 
     | 
    
         
             
                        uint32_t jabs = vector_get_i(split_candidates, r);
         
     | 
| 
         @@ -376,7 +398,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       376 
398 
     | 
    
         
             
                        if (jabs < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       377 
399 
     | 
    
         | 
| 
       378 
400 
     | 
    
         
             
                        LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
         
     | 
| 
       379 
     | 
    
         
            -
                        LDouble sj        = cost_base +  
     | 
| 
      
 401 
     | 
    
         
            +
                        LDouble sj        = cost_base + dissim(jabs, i, xsum, xsumsq);
         
     | 
| 
       380 
402 
     | 
    
         
             
                        LDouble cost_prev = matrix_get_f(state.cost, row, i);
         
     | 
| 
       381 
403 
     | 
    
         | 
| 
       382 
404 
     | 
    
         
             
                        if (sj <= cost_prev) {
         
     | 
| 
         @@ -392,7 +414,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       392 
414 
     | 
    
         
             
                }
         
     | 
| 
       393 
415 
     | 
    
         
             
            }
         
     | 
| 
       394 
416 
     | 
    
         | 
| 
       395 
     | 
    
         
            -
            void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 417 
     | 
    
         
            +
            inline void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       396 
418 
     | 
    
         
             
            {
         
     | 
| 
       397 
419 
     | 
    
         
             
                const uint32_t row    = rparams.row;
         
     | 
| 
       398 
420 
     | 
    
         
             
                const uint32_t imin   = rparams.imin;
         
     | 
| 
         @@ -400,6 +422,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       400 
422 
     | 
    
         
             
                const uint32_t istep  = rparams.istep;
         
     | 
| 
       401 
423 
     | 
    
         
             
                MatrixF *const cost   = state.cost;
         
     | 
| 
       402 
424 
     | 
    
         
             
                MatrixI *const splits = state.splits;
         
     | 
| 
      
 425 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       403 
426 
     | 
    
         | 
| 
       404 
427 
     | 
    
         
             
                uint32_t optimal_split_idx_prev = 0;
         
     | 
| 
       405 
428 
     | 
    
         | 
| 
         @@ -408,7 +431,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       408 
431 
     | 
    
         
             
                    const uint32_t optimal_split_idx = optimal_split_idx_prev;
         
     | 
| 
       409 
432 
     | 
    
         
             
                    const uint32_t optimal_split     = vector_get_i(split_candidates, optimal_split_idx);
         
     | 
| 
       410 
433 
     | 
    
         
             
                    const uint32_t cost_prev         = matrix_get_f(cost, row - 1, optimal_split - 1);
         
     | 
| 
       411 
     | 
    
         
            -
                    const LDouble added_cost         =  
     | 
| 
      
 434 
     | 
    
         
            +
                    const LDouble added_cost         = dissim(optimal_split, i, state.xsum, state.xsumsq);
         
     | 
| 
       412 
435 
     | 
    
         | 
| 
       413 
436 
     | 
    
         
             
                    matrix_set_f(cost, row, i, cost_prev + added_cost);
         
     | 
| 
       414 
437 
     | 
    
         
             
                    matrix_set_i(splits, row, i, optimal_split);
         
     | 
| 
         @@ -421,7 +444,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       421 
444 
     | 
    
         
             
                        if (split > i) break;
         
     | 
| 
       422 
445 
     | 
    
         | 
| 
       423 
446 
     | 
    
         
             
                        LDouble split_cost =
         
     | 
| 
       424 
     | 
    
         
            -
                            matrix_get_f(cost, row - 1, split - 1) +  
     | 
| 
      
 447 
     | 
    
         
            +
                            matrix_get_f(cost, row - 1, split - 1) + dissim(split, i, state.xsum, state.xsumsq);
         
     | 
| 
       425 
448 
     | 
    
         | 
| 
       426 
449 
     | 
    
         
             
                        if (split_cost > matrix_get_f(cost, row, i)) continue;
         
     | 
| 
       427 
450 
     | 
    
         | 
| 
         @@ -432,7 +455,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       432 
455 
     | 
    
         
             
                }
         
     | 
| 
       433 
456 
     | 
    
         
             
            }
         
     | 
| 
       434 
457 
     | 
    
         | 
| 
       435 
     | 
    
         
            -
            VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 458 
     | 
    
         
            +
            inline VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       436 
459 
     | 
    
         
             
            {
         
     | 
| 
       437 
460 
     | 
    
         
             
                uint32_t imin  = rparams.imin;
         
     | 
| 
       438 
461 
     | 
    
         
             
                uint32_t row   = rparams.row;
         
     | 
| 
         @@ -445,6 +468,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       445 
468 
     | 
    
         
             
                uint32_t left   = 0;
         
     | 
| 
       446 
469 
     | 
    
         
             
                uint32_t right  = 0;
         
     | 
| 
       447 
470 
     | 
    
         
             
                VectorI *pruned = vector_dup_i(split_candidates, state.arena);
         
     | 
| 
      
 471 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       448 
472 
     | 
    
         | 
| 
       449 
473 
     | 
    
         
             
                while (m > n)
         
     | 
| 
       450 
474 
     | 
    
         
             
                {
         
     | 
| 
         @@ -452,9 +476,9 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       452 
476 
     | 
    
         
             
                    uint32_t j     = vector_get_i(pruned, right);
         
     | 
| 
       453 
477 
     | 
    
         
             
                    uint32_t jnext = vector_get_i(pruned, right + 1);
         
     | 
| 
       454 
478 
     | 
    
         
             
                    LDouble sl     =
         
     | 
| 
       455 
     | 
    
         
            -
                        matrix_get_f(state.cost, row - 1, j - 1) +  
     | 
| 
      
 479 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, j - 1) + dissim(j, i, state.xsum, state.xsumsq);
         
     | 
| 
       456 
480 
     | 
    
         
             
                    LDouble snext  =
         
     | 
| 
       457 
     | 
    
         
            -
                        matrix_get_f(state.cost, row - 1, jnext - 1) +  
     | 
| 
      
 481 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, jnext - 1) + dissim(jnext, i, state.xsum, state.xsumsq);
         
     | 
| 
       458 
482 
     | 
    
         | 
| 
       459 
483 
     | 
    
         
             
                    if ((sl < snext) && (left < n - 1)) {
         
     | 
| 
       460 
484 
     | 
    
         
             
                        vector_set_i(pruned, left, j);
         
     | 
| 
         @@ -484,7 +508,8 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       484 
508 
     | 
    
         
             
                return pruned;
         
     | 
| 
       485 
509 
     | 
    
         
             
            }
         
     | 
| 
       486 
510 
     | 
    
         | 
| 
       487 
     | 
    
         
            -
             
     | 
| 
      
 511 
     | 
    
         
            +
            /* L2 aka Euclidean aka Mean dissimilarity criteria */
         
     | 
| 
      
 512 
     | 
    
         
            +
            inline LDouble dissimilarity_l2(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
         
     | 
| 
       488 
513 
     | 
    
         
             
                LDouble sji = 0.0;
         
     | 
| 
       489 
514 
     | 
    
         | 
| 
       490 
515 
     | 
    
         
             
                if (j >= i) return sji;
         
     | 
| 
         @@ -501,6 +526,48 @@ inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, Vec 
     | 
|
| 
       501 
526 
     | 
    
         
             
                return (sji > 0) ? sji : 0.0;
         
     | 
| 
       502 
527 
     | 
    
         
             
            }
         
     | 
| 
       503 
528 
     | 
    
         | 
| 
      
 529 
     | 
    
         
            +
            /* L1 aka Manhattan aka Median dissimilarity criteria */
         
     | 
| 
      
 530 
     | 
    
         
            +
            inline LDouble dissimilarity_l1(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict _xsumsq)
         
     | 
| 
      
 531 
     | 
    
         
            +
            {
         
     | 
| 
      
 532 
     | 
    
         
            +
                LDouble sji = 0.0;
         
     | 
| 
      
 533 
     | 
    
         
            +
             
     | 
| 
      
 534 
     | 
    
         
            +
                if (j >= i) return sji;
         
     | 
| 
      
 535 
     | 
    
         
            +
             
     | 
| 
      
 536 
     | 
    
         
            +
                if (j > 0) {
         
     | 
| 
      
 537 
     | 
    
         
            +
                    uint32_t median_idx = (i + j) >> 1;
         
     | 
| 
      
 538 
     | 
    
         
            +
             
     | 
| 
      
 539 
     | 
    
         
            +
                    if (((i - j + 1) % 2) == 1) {
         
     | 
| 
      
 540 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 541 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx - 1)
         
     | 
| 
      
 542 
     | 
    
         
            +
                            + vector_get_f(xsum, j - 1)
         
     | 
| 
      
 543 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 544 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 545 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 546 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 547 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx)
         
     | 
| 
      
 548 
     | 
    
         
            +
                            + vector_get_f(xsum, j - 1)
         
     | 
| 
      
 549 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 550 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 551 
     | 
    
         
            +
                    }
         
     | 
| 
      
 552 
     | 
    
         
            +
                } else { // j == 0
         
     | 
| 
      
 553 
     | 
    
         
            +
                    uint32_t median_idx = i >> 1;
         
     | 
| 
      
 554 
     | 
    
         
            +
             
     | 
| 
      
 555 
     | 
    
         
            +
                    if (((i + 1) % 2) == 1) {
         
     | 
| 
      
 556 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 557 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx - 1)
         
     | 
| 
      
 558 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 559 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 560 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 561 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 562 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx)
         
     | 
| 
      
 563 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 564 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 565 
     | 
    
         
            +
                    }
         
     | 
| 
      
 566 
     | 
    
         
            +
                }
         
     | 
| 
      
 567 
     | 
    
         
            +
             
     | 
| 
      
 568 
     | 
    
         
            +
                return (sji < 0) ? 0.0 : sji;
         
     | 
| 
      
 569 
     | 
    
         
            +
            }
         
     | 
| 
      
 570 
     | 
    
         
            +
             
     | 
| 
       504 
571 
     | 
    
         
             
            inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
         
     | 
| 
       505 
572 
     | 
    
         
             
                VectorF *v;
         
     | 
| 
       506 
573 
     | 
    
         | 
    
        data/lib/ckmeans/clusterer.rb
    CHANGED
    
    
    
        data/lib/ckmeans/version.rb
    CHANGED
    
    
    
        data/lib/ckmeans.rb
    CHANGED
    
    
| 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Ckmedian
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Clusterer # rubocop:disable Style/Documentation
         
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(entries, kmin, kmax = kmin)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @xcount = entries.size
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  raise ArgumentError, "Minimum cluster count is bigger than element count" if kmin > @xcount
         
     | 
| 
      
 9 
     | 
    
         
            +
                  raise ArgumentError, "Maximum cluster count is bigger than element count" if kmax > @xcount
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                  @kmin             = kmin
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @unique_xcount    = entries.uniq.size
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @kmax             = [@unique_xcount, kmax].min
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @xsorted_original = entries.sort
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @xsorted          = @xsorted_original.map(&:to_f)
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                def clusters
         
     | 
| 
      
 19 
     | 
    
         
            +
                  @clusters ||=
         
     | 
| 
      
 20 
     | 
    
         
            +
                    if @unique_xcount <= 1
         
     | 
| 
      
 21 
     | 
    
         
            +
                      [@xsorted_original]
         
     | 
| 
      
 22 
     | 
    
         
            +
                    else
         
     | 
| 
      
 23 
     | 
    
         
            +
                      sorted_group_sizes.each_with_object([]) do |size, groups|
         
     | 
| 
      
 24 
     | 
    
         
            +
                        groups << @xsorted_original.shift(size)
         
     | 
| 
      
 25 
     | 
    
         
            +
                      end
         
     | 
| 
      
 26 
     | 
    
         
            +
                    end
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: ckmeans
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.1.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Vlad Lebedev
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire:
         
     | 
| 
       8 
9 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       9 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       10 
     | 
    
         
            -
            date: 2025-05- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2025-05-23 00:00:00.000000000 Z
         
     | 
| 
       11 
12 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       12 
13 
     | 
    
         
             
            description: Repeatable clustering of unidimensional data
         
     | 
| 
       13 
14 
     | 
    
         
             
            email:
         
     | 
| 
         @@ -32,6 +33,7 @@ files: 
     | 
|
| 
       32 
33 
     | 
    
         
             
            - lib/ckmeans.rb
         
     | 
| 
       33 
34 
     | 
    
         
             
            - lib/ckmeans/clusterer.rb
         
     | 
| 
       34 
35 
     | 
    
         
             
            - lib/ckmeans/version.rb
         
     | 
| 
      
 36 
     | 
    
         
            +
            - lib/ckmedian/clusterer.rb
         
     | 
| 
       35 
37 
     | 
    
         
             
            - sig/ckmeans.rbs
         
     | 
| 
       36 
38 
     | 
    
         
             
            homepage: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       37 
39 
     | 
    
         
             
            licenses:
         
     | 
| 
         @@ -41,6 +43,7 @@ metadata: 
     | 
|
| 
       41 
43 
     | 
    
         
             
              homepage_uri: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       42 
44 
     | 
    
         
             
              source_code_uri: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       43 
45 
     | 
    
         
             
              changelog_uri: https://github.com/vlebedeff/rb-ckmeans/blob/main/CHANGELOG.md
         
     | 
| 
      
 46 
     | 
    
         
            +
            post_install_message:
         
     | 
| 
       44 
47 
     | 
    
         
             
            rdoc_options: []
         
     | 
| 
       45 
48 
     | 
    
         
             
            require_paths:
         
     | 
| 
       46 
49 
     | 
    
         
             
            - lib
         
     | 
| 
         @@ -55,7 +58,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       55 
58 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       56 
59 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       57 
60 
     | 
    
         
             
            requirements: []
         
     | 
| 
       58 
     | 
    
         
            -
            rubygems_version: 3. 
     | 
| 
      
 61 
     | 
    
         
            +
            rubygems_version: 3.4.19
         
     | 
| 
      
 62 
     | 
    
         
            +
            signing_key:
         
     | 
| 
       59 
63 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       60 
64 
     | 
    
         
             
            summary: Ruby implementation of Ckmeans.1d.dp
         
     | 
| 
       61 
65 
     | 
    
         
             
            test_files: []
         
     |