ckmeans 1.0.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.ruby-version +1 -1
 - data/README.md +24 -3
 - data/ext/ckmeans/extensions.c +208 -49
 - data/lib/ckmeans/clusterer.rb +7 -9
 - data/lib/ckmeans/version.rb +1 -1
 - data/lib/ckmeans.rb +2 -0
 - data/lib/ckmedian/clusterer.rb +29 -0
 - metadata +7 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 1d63d8f65d386bf27082e0a65b1ea82a7d150394b1424ab5c2c274e139f91482
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 1f3c4e91fcc9f3bda3d83521cac164ff83e3e5095705cd15420c6278635fc266
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 0101cd5f6d5ba925d8f37cc73416008ace4ffce7ea33a437e0189549ede4cbc23b7284de2fe28af181ddf08396b74225c67626e94ce015d54ac14fde17b53bda
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: abbcc012e9378ea1fbf15566fd47691bd4cecaaeaf95947c45414dfb7b304db87d803120749aab3ccbf806ab90dd554cce2461f340c348e4f1b820f47be421a2
         
     | 
    
        data/.ruby-version
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            3.2. 
     | 
| 
      
 1 
     | 
    
         
            +
            3.2.8
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -18,10 +18,31 @@ gem install ckmeans 
     | 
|
| 
       18 
18 
     | 
    
         | 
| 
       19 
19 
     | 
    
         
             
            ## Usage
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
      
 21 
     | 
    
         
            +
            ### Fixed Cluster Count
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            ```rb
         
     | 
| 
      
 24 
     | 
    
         
            +
            # Fixed cluster count
         
     | 
| 
      
 25 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin).clusters
         
     | 
| 
      
 26 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin).clusters
         
     | 
| 
      
 27 
     | 
    
         
            +
            ```
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            ### Estimate optimal cluster count within kmin and kmax
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
            ```rb
         
     | 
| 
      
 32 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin, kmax).clusters
         
     | 
| 
      
 33 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin, kmax).clusters
         
     | 
| 
      
 34 
     | 
    
         
            +
            ```
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
            ### Fast & Stable Estimation of K
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            For big collections without many duplicates, use regular estimation.
         
     | 
| 
      
 39 
     | 
    
         
            +
            For relatively small sets or sets with many duplicates use Gaussian Mixture Model (GMM)-based estimation.
         
     | 
| 
      
 40 
     | 
    
         
            +
            It works slower but is more resilient for various data patterns like big numbers of duplicates or clusters with different
         
     | 
| 
      
 41 
     | 
    
         
            +
            numbers of elements.
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
       21 
43 
     | 
    
         
             
            ```rb
         
     | 
| 
       22 
     | 
    
         
            -
            Ckmeans::Clusterer(data, kmin).clusters 
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
            Ckmeans::Clusterer(data, kmin, kmax, :sensitive).clusters # Adjust Bayesian Information Criteria favoring more smaller clusters
         
     | 
| 
      
 44 
     | 
    
         
            +
            Ckmeans::Clusterer(data, kmin, kmax, :gmm).clusters
         
     | 
| 
      
 45 
     | 
    
         
            +
            Ckmedian::Clusterer(data, kmin, kmax, :gmm).clusters
         
     | 
| 
       25 
46 
     | 
    
         
             
            ```
         
     | 
| 
       26 
47 
     | 
    
         | 
| 
       27 
48 
     | 
    
         
             
            ## License
         
     | 
    
        data/ext/ckmeans/extensions.c
    CHANGED
    
    | 
         @@ -33,17 +33,19 @@ typedef struct VectorI { 
     | 
|
| 
       33 
33 
     | 
    
         
             
                uint32_t *values;
         
     | 
| 
       34 
34 
     | 
    
         
             
            } VectorI;
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
      
 36 
     | 
    
         
            +
            typedef LDouble (FnDissim)(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
       36 
38 
     | 
    
         
             
            typedef struct State {
         
     | 
| 
       37 
39 
     | 
    
         
             
                uint32_t xcount;
         
     | 
| 
       38 
40 
     | 
    
         
             
                uint32_t kmin;
         
     | 
| 
       39 
41 
     | 
    
         
             
                uint32_t kmax;
         
     | 
| 
       40 
     | 
    
         
            -
                bool     apply_deviation;
         
     | 
| 
       41 
42 
     | 
    
         
             
                Arena   *arena;
         
     | 
| 
       42 
43 
     | 
    
         
             
                VectorF *xsorted;
         
     | 
| 
       43 
44 
     | 
    
         
             
                MatrixF *cost;
         
     | 
| 
       44 
45 
     | 
    
         
             
                MatrixI *splits;
         
     | 
| 
       45 
46 
     | 
    
         
             
                VectorF *xsum;
         
     | 
| 
       46 
47 
     | 
    
         
             
                VectorF *xsumsq;
         
     | 
| 
      
 48 
     | 
    
         
            +
                FnDissim *dissim;
         
     | 
| 
       47 
49 
     | 
    
         
             
            } State;
         
     | 
| 
       48 
50 
     | 
    
         | 
| 
       49 
51 
     | 
    
         
             
            typedef struct RowParams {
         
     | 
| 
         @@ -59,6 +61,8 @@ typedef struct { 
     | 
|
| 
       59 
61 
     | 
    
         
             
            } SegmentStats;
         
     | 
| 
       60 
62 
     | 
    
         | 
| 
       61 
63 
     | 
    
         
             
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 64 
     | 
    
         
            +
            VALUE rb_ckmedian_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 65 
     | 
    
         
            +
            VALUE rb_sorted_group_sizes(VALUE self, FnDissim*);
         
     | 
| 
       62 
66 
     | 
    
         | 
| 
       63 
67 
     | 
    
         
             
            Arena *arena_create(size_t);
         
     | 
| 
       64 
68 
     | 
    
         
             
            void  *arena_alloc(Arena*, size_t);
         
     | 
| 
         @@ -85,7 +89,8 @@ uint32_t vector_get_i(VectorI*, uint32_t offset); 
     | 
|
| 
       85 
89 
     | 
    
         
             
            void     vector_downsize_i(VectorI*, uint32_t);
         
     | 
| 
       86 
90 
     | 
    
         
             
            void     vector_inspect_i(VectorI*);
         
     | 
| 
       87 
91 
     | 
    
         | 
| 
       88 
     | 
    
         
            -
            LDouble       
     | 
| 
      
 92 
     | 
    
         
            +
            LDouble      dissimilarity_l2(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 93 
     | 
    
         
            +
            LDouble      dissimilarity_l1(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
       89 
94 
     | 
    
         
             
            void         fill_row(State, uint32_t, uint32_t, uint32_t);
         
     | 
| 
       90 
95 
     | 
    
         
             
            void         smawk(State, RowParams, VectorI*);
         
     | 
| 
       91 
96 
     | 
    
         
             
            void         find_min_from_candidates(State, RowParams, VectorI*);
         
     | 
| 
         @@ -93,13 +98,17 @@ VectorI      *prune_candidates(State, RowParams, VectorI*); 
     | 
|
| 
       93 
98 
     | 
    
         
             
            void         fill_even_positions(State, RowParams, VectorI*);
         
     | 
| 
       94 
99 
     | 
    
         
             
            SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t);
         
     | 
| 
       95 
100 
     | 
    
         
             
            VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
         
     | 
| 
       96 
     | 
    
         
            -
            uint32_t      
     | 
| 
      
 101 
     | 
    
         
            +
            uint32_t     find_koptimal_fast(State);
         
     | 
| 
      
 102 
     | 
    
         
            +
            uint32_t     find_koptimal_gmm(State);
         
     | 
| 
       97 
103 
     | 
    
         | 
| 
       98 
104 
     | 
    
         
             
            void Init_extensions(void) {
         
     | 
| 
       99 
     | 
    
         
            -
                VALUE ckmeans_module 
     | 
| 
       100 
     | 
    
         
            -
                VALUE  
     | 
| 
      
 105 
     | 
    
         
            +
                VALUE ckmeans_module     = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
         
     | 
| 
      
 106 
     | 
    
         
            +
                VALUE ckmedian_module    = rb_const_get(rb_cObject, rb_intern("Ckmedian"));
         
     | 
| 
      
 107 
     | 
    
         
            +
                VALUE ckmeans_clusterer  = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
         
     | 
| 
      
 108 
     | 
    
         
            +
                VALUE ckmedian_clusterer = rb_const_get(ckmedian_module, rb_intern("Clusterer"));
         
     | 
| 
       101 
109 
     | 
    
         | 
| 
       102 
     | 
    
         
            -
                rb_define_private_method( 
     | 
| 
      
 110 
     | 
    
         
            +
                rb_define_private_method(ckmeans_clusterer, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
         
     | 
| 
      
 111 
     | 
    
         
            +
                rb_define_private_method(ckmedian_clusterer, "sorted_group_sizes", rb_ckmedian_sorted_group_sizes, 0);
         
     | 
| 
       103 
112 
     | 
    
         
             
            }
         
     | 
| 
       104 
113 
     | 
    
         | 
| 
       105 
114 
     | 
    
         
             
            # define ARENA_MIN_CAPACITY 100
         
     | 
| 
         @@ -108,13 +117,23 @@ void Init_extensions(void) { 
     | 
|
| 
       108 
117 
     | 
    
         | 
| 
       109 
118 
     | 
    
         
             
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
         
     | 
| 
       110 
119 
     | 
    
         
             
            {
         
     | 
| 
       111 
     | 
    
         
            -
                 
     | 
| 
       112 
     | 
    
         
            -
             
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                 
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
      
 120 
     | 
    
         
            +
                return rb_sorted_group_sizes(self, dissimilarity_l2);
         
     | 
| 
      
 121 
     | 
    
         
            +
            }
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
            VALUE rb_ckmedian_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 124 
     | 
    
         
            +
            {
         
     | 
| 
      
 125 
     | 
    
         
            +
                return rb_sorted_group_sizes(self, dissimilarity_l1);
         
     | 
| 
      
 126 
     | 
    
         
            +
            }
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
            VALUE rb_sorted_group_sizes(VALUE self, FnDissim *criteria)
         
     | 
| 
      
 129 
     | 
    
         
            +
            {
         
     | 
| 
      
 130 
     | 
    
         
            +
                uint32_t xcount  = NUM2UINT(rb_iv_get(self, "@xcount"));
         
     | 
| 
      
 131 
     | 
    
         
            +
                uint32_t kmin    = NUM2UINT(rb_iv_get(self, "@kmin"));
         
     | 
| 
      
 132 
     | 
    
         
            +
                uint32_t kmax    = NUM2UINT(rb_iv_get(self, "@kmax"));
         
     | 
| 
      
 133 
     | 
    
         
            +
                bool use_gmm     = RTEST(rb_iv_get(self, "@use_gmm"));
         
     | 
| 
      
 134 
     | 
    
         
            +
                VALUE rb_xsorted = rb_iv_get(self, "@xsorted");
         
     | 
| 
      
 135 
     | 
    
         
            +
                size_t capacity  = sizeof(LDouble) * (xcount + 2) * (kmax + 2) * ALLOCATION_FACTOR + ARENA_MIN_CAPACITY;
         
     | 
| 
      
 136 
     | 
    
         
            +
                Arena *arena     = arena_create(capacity);
         
     | 
| 
       118 
137 
     | 
    
         | 
| 
       119 
138 
     | 
    
         
             
                if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
         
     | 
| 
       120 
139 
     | 
    
         | 
| 
         @@ -130,16 +149,16 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       130 
149 
     | 
    
         
             
                }
         
     | 
| 
       131 
150 
     | 
    
         | 
| 
       132 
151 
     | 
    
         
             
                State state = {
         
     | 
| 
       133 
     | 
    
         
            -
                    .arena 
     | 
| 
       134 
     | 
    
         
            -
                    .xcount 
     | 
| 
       135 
     | 
    
         
            -
                    .kmin 
     | 
| 
       136 
     | 
    
         
            -
                    .kmax 
     | 
| 
       137 
     | 
    
         
            -
                    . 
     | 
| 
       138 
     | 
    
         
            -
                    . 
     | 
| 
       139 
     | 
    
         
            -
                    . 
     | 
| 
       140 
     | 
    
         
            -
                    . 
     | 
| 
       141 
     | 
    
         
            -
                    . 
     | 
| 
       142 
     | 
    
         
            -
                    . 
     | 
| 
      
 152 
     | 
    
         
            +
                    .arena   = arena,
         
     | 
| 
      
 153 
     | 
    
         
            +
                    .xcount  = xcount,
         
     | 
| 
      
 154 
     | 
    
         
            +
                    .kmin    = kmin,
         
     | 
| 
      
 155 
     | 
    
         
            +
                    .kmax    = kmax,
         
     | 
| 
      
 156 
     | 
    
         
            +
                    .xsorted = xsorted,
         
     | 
| 
      
 157 
     | 
    
         
            +
                    .cost    = cost,
         
     | 
| 
      
 158 
     | 
    
         
            +
                    .splits  = splits,
         
     | 
| 
      
 159 
     | 
    
         
            +
                    .xsum    = xsum,
         
     | 
| 
      
 160 
     | 
    
         
            +
                    .xsumsq  = xsumsq,
         
     | 
| 
      
 161 
     | 
    
         
            +
                    .dissim  = criteria
         
     | 
| 
       143 
162 
     | 
    
         
             
                };
         
     | 
| 
       144 
163 
     | 
    
         | 
| 
       145 
164 
     | 
    
         | 
| 
         @@ -157,7 +176,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       157 
176 
     | 
    
         | 
| 
       158 
177 
     | 
    
         
             
                    vector_set_f(xsum, i, xsum_prev + diff);
         
     | 
| 
       159 
178 
     | 
    
         
             
                    vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
         
     | 
| 
       160 
     | 
    
         
            -
                    matrix_set_f(cost, 0, i,  
     | 
| 
      
 179 
     | 
    
         
            +
                    matrix_set_f(cost, 0, i, criteria(0, i, xsum, xsumsq));
         
     | 
| 
       161 
180 
     | 
    
         
             
                    matrix_set_i(splits, 0, i, 0);
         
     | 
| 
       162 
181 
     | 
    
         
             
                }
         
     | 
| 
       163 
182 
     | 
    
         | 
| 
         @@ -166,7 +185,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       166 
185 
     | 
    
         
             
                    fill_row(state, q, imin, xcount - 1);
         
     | 
| 
       167 
186 
     | 
    
         
             
                }
         
     | 
| 
       168 
187 
     | 
    
         | 
| 
       169 
     | 
    
         
            -
                uint32_t koptimal =  
     | 
| 
      
 188 
     | 
    
         
            +
                uint32_t koptimal = use_gmm ? find_koptimal_gmm(state) : find_koptimal_fast(state);
         
     | 
| 
       170 
189 
     | 
    
         | 
| 
       171 
190 
     | 
    
         
             
                VectorI *sizes = vector_create_i(arena, koptimal);
         
     | 
| 
       172 
191 
     | 
    
         
             
                backtrack_sizes(state, sizes, koptimal);
         
     | 
| 
         @@ -188,7 +207,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
|
| 
       188 
207 
     | 
    
         
             
                return response;
         
     | 
| 
       189 
208 
     | 
    
         
             
            }
         
     | 
| 
       190 
209 
     | 
    
         | 
| 
       191 
     | 
    
         
            -
            uint32_t  
     | 
| 
      
 210 
     | 
    
         
            +
            uint32_t find_koptimal_fast(State state)
         
     | 
| 
       192 
211 
     | 
    
         
             
            {
         
     | 
| 
       193 
212 
     | 
    
         
             
                uint32_t kmin       = state.kmin;
         
     | 
| 
       194 
213 
     | 
    
         
             
                uint32_t kmax       = state.kmax;
         
     | 
| 
         @@ -235,8 +254,7 @@ uint32_t find_koptimal(State state) 
     | 
|
| 
       235 
254 
     | 
    
         
             
                                loglikelihood += -(xi - mean) * (xi - mean) / (2.0 * variance);
         
     | 
| 
       236 
255 
     | 
    
         
             
                            }
         
     | 
| 
       237 
256 
     | 
    
         
             
                            loglikelihood += npoints * (
         
     | 
| 
       238 
     | 
    
         
            -
                                 
     | 
| 
       239 
     | 
    
         
            -
                                (0.5 * log(PIx2 * variance))
         
     | 
| 
      
 257 
     | 
    
         
            +
                                log(npoints / (LDouble) xcount) - (0.5 * log(PIx2 * variance))
         
     | 
| 
       240 
258 
     | 
    
         
             
                            );
         
     | 
| 
       241 
259 
     | 
    
         
             
                        } else {
         
     | 
| 
       242 
260 
     | 
    
         
             
                            loglikelihood += npoints * log(1.0 / bin_width / xcount);
         
     | 
| 
         @@ -259,6 +277,101 @@ uint32_t find_koptimal(State state) 
     | 
|
| 
       259 
277 
     | 
    
         
             
                return kopt;
         
     | 
| 
       260 
278 
     | 
    
         
             
            }
         
     | 
| 
       261 
279 
     | 
    
         | 
| 
      
 280 
     | 
    
         
            +
            uint32_t find_koptimal_gmm(State state)
         
     | 
| 
      
 281 
     | 
    
         
            +
            {
         
     | 
| 
      
 282 
     | 
    
         
            +
                uint32_t kmin = state.kmin;
         
     | 
| 
      
 283 
     | 
    
         
            +
                uint32_t kmax = state.kmax;
         
     | 
| 
      
 284 
     | 
    
         
            +
                uint32_t xcount = state.xcount;
         
     | 
| 
      
 285 
     | 
    
         
            +
             
     | 
| 
      
 286 
     | 
    
         
            +
                if (kmin > kmax || xcount < 2) {
         
     | 
| 
      
 287 
     | 
    
         
            +
                    return (kmin < kmax) ? kmin : kmax;
         
     | 
| 
      
 288 
     | 
    
         
            +
                }
         
     | 
| 
      
 289 
     | 
    
         
            +
             
     | 
| 
      
 290 
     | 
    
         
            +
                Arena *arena       = state.arena;
         
     | 
| 
      
 291 
     | 
    
         
            +
                VectorF *xsorted   = state.xsorted;
         
     | 
| 
      
 292 
     | 
    
         
            +
                uint32_t kopt      = kmin;
         
     | 
| 
      
 293 
     | 
    
         
            +
                LDouble max_bic    = 0.0;
         
     | 
| 
      
 294 
     | 
    
         
            +
                LDouble log_xcount = log((LDouble) xcount);
         
     | 
| 
      
 295 
     | 
    
         
            +
                VectorF *lambda    = vector_create_f(arena, kmax);
         
     | 
| 
      
 296 
     | 
    
         
            +
                VectorF *mu        = vector_create_f(arena, kmax);
         
     | 
| 
      
 297 
     | 
    
         
            +
                VectorF *sigma2    = vector_create_f(arena, kmax);
         
     | 
| 
      
 298 
     | 
    
         
            +
                VectorF *coeff     = vector_create_f(arena, kmax);
         
     | 
| 
      
 299 
     | 
    
         
            +
                VectorI *sizes     = vector_create_i(arena, kmax);
         
     | 
| 
      
 300 
     | 
    
         
            +
             
     | 
| 
      
 301 
     | 
    
         
            +
                for (uint32_t kouter = kmin; kouter <= kmax; ++kouter)
         
     | 
| 
      
 302 
     | 
    
         
            +
                {
         
     | 
| 
      
 303 
     | 
    
         
            +
                    uint32_t ileft = 0;
         
     | 
| 
      
 304 
     | 
    
         
            +
                    uint32_t iright;
         
     | 
| 
      
 305 
     | 
    
         
            +
             
     | 
| 
      
 306 
     | 
    
         
            +
                    backtrack_sizes(state, sizes, kouter);
         
     | 
| 
      
 307 
     | 
    
         
            +
             
     | 
| 
      
 308 
     | 
    
         
            +
                    for (uint32_t k = 0; k < kouter; ++k)
         
     | 
| 
      
 309 
     | 
    
         
            +
                    {
         
     | 
| 
      
 310 
     | 
    
         
            +
                        uint32_t size = vector_get_i(sizes, k);
         
     | 
| 
      
 311 
     | 
    
         
            +
                        vector_set_f(lambda, k, size / (LDouble) xcount);
         
     | 
| 
      
 312 
     | 
    
         
            +
                        iright = ileft + size - 1;
         
     | 
| 
      
 313 
     | 
    
         
            +
                        SegmentStats stats = shifted_data_variance(xsorted, ileft, iright);
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
                        vector_set_f(mu, k, stats.mean);
         
     | 
| 
      
 316 
     | 
    
         
            +
                        vector_set_f(sigma2, k, stats.variance);
         
     | 
| 
      
 317 
     | 
    
         
            +
             
     | 
| 
      
 318 
     | 
    
         
            +
                        if (stats.variance == 0 || size == 1) {
         
     | 
| 
      
 319 
     | 
    
         
            +
                            LDouble dmin;
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
                            if (ileft > 0 && iright < xcount - 1) {
         
     | 
| 
      
 322 
     | 
    
         
            +
                                LDouble left_diff = vector_get_diff_f(xsorted, ileft, ileft - 1);
         
     | 
| 
      
 323 
     | 
    
         
            +
                                LDouble right_diff = vector_get_diff_f(xsorted, iright + 1, iright);
         
     | 
| 
      
 324 
     | 
    
         
            +
             
     | 
| 
      
 325 
     | 
    
         
            +
                                dmin = (left_diff < right_diff) ? left_diff : right_diff;
         
     | 
| 
      
 326 
     | 
    
         
            +
                            } else if (ileft > 0) {
         
     | 
| 
      
 327 
     | 
    
         
            +
                                dmin = vector_get_diff_f(xsorted, ileft, ileft - 1);
         
     | 
| 
      
 328 
     | 
    
         
            +
                            } else {
         
     | 
| 
      
 329 
     | 
    
         
            +
                                dmin = vector_get_diff_f(xsorted, iright + 1, iright);
         
     | 
| 
      
 330 
     | 
    
         
            +
                            }
         
     | 
| 
      
 331 
     | 
    
         
            +
             
     | 
| 
      
 332 
     | 
    
         
            +
                            if (stats.variance == 0) vector_set_f(sigma2, k, dmin * dmin / 4.0 / 9.0);
         
     | 
| 
      
 333 
     | 
    
         
            +
                            if (size == 1)  vector_set_f(sigma2, k, dmin * dmin);
         
     | 
| 
      
 334 
     | 
    
         
            +
                        }
         
     | 
| 
      
 335 
     | 
    
         
            +
             
     | 
| 
      
 336 
     | 
    
         
            +
                        LDouble lambda_k = vector_get_f(lambda, k);
         
     | 
| 
      
 337 
     | 
    
         
            +
                        LDouble sigma2_k = vector_get_f(sigma2, k);
         
     | 
| 
      
 338 
     | 
    
         
            +
                        vector_set_f(coeff, k, lambda_k / sqrt(PIx2 * sigma2_k));
         
     | 
| 
      
 339 
     | 
    
         
            +
                        ileft = iright + 1;
         
     | 
| 
      
 340 
     | 
    
         
            +
                    }
         
     | 
| 
      
 341 
     | 
    
         
            +
             
     | 
| 
      
 342 
     | 
    
         
            +
                    LDouble loglikelihood = 0.0;
         
     | 
| 
      
 343 
     | 
    
         
            +
             
     | 
| 
      
 344 
     | 
    
         
            +
                    for (uint32_t i = 0; i < xcount; ++i)
         
     | 
| 
      
 345 
     | 
    
         
            +
                    {
         
     | 
| 
      
 346 
     | 
    
         
            +
                        LDouble L  = 0.0;
         
     | 
| 
      
 347 
     | 
    
         
            +
                        LDouble xi = vector_get_f(xsorted, i);
         
     | 
| 
      
 348 
     | 
    
         
            +
             
     | 
| 
      
 349 
     | 
    
         
            +
                        for (uint32_t k = 0; k < kouter; ++k)
         
     | 
| 
      
 350 
     | 
    
         
            +
                        {
         
     | 
| 
      
 351 
     | 
    
         
            +
                            LDouble coeff_k   = vector_get_f(coeff, k);
         
     | 
| 
      
 352 
     | 
    
         
            +
                            LDouble mu_k      = vector_get_f(mu, k);
         
     | 
| 
      
 353 
     | 
    
         
            +
                            LDouble sigma2_k  = vector_get_f(sigma2, k);
         
     | 
| 
      
 354 
     | 
    
         
            +
                            LDouble x_mu_diff = xi - mu_k;
         
     | 
| 
      
 355 
     | 
    
         
            +
                            L                += coeff_k * exp(- x_mu_diff * x_mu_diff / (2.0 * sigma2_k));
         
     | 
| 
      
 356 
     | 
    
         
            +
                        }
         
     | 
| 
      
 357 
     | 
    
         
            +
                        loglikelihood += log(L);
         
     | 
| 
      
 358 
     | 
    
         
            +
                    }
         
     | 
| 
      
 359 
     | 
    
         
            +
             
     | 
| 
      
 360 
     | 
    
         
            +
                    LDouble bic = 2 * loglikelihood - (3 * kouter - 1) * log_xcount;
         
     | 
| 
      
 361 
     | 
    
         
            +
             
     | 
| 
      
 362 
     | 
    
         
            +
                    if (kouter == kmin) {
         
     | 
| 
      
 363 
     | 
    
         
            +
                        max_bic = bic;
         
     | 
| 
      
 364 
     | 
    
         
            +
                        kopt = kmin;
         
     | 
| 
      
 365 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 366 
     | 
    
         
            +
                        if (bic > max_bic) {
         
     | 
| 
      
 367 
     | 
    
         
            +
                            max_bic = bic;
         
     | 
| 
      
 368 
     | 
    
         
            +
                            kopt = kouter;
         
     | 
| 
      
 369 
     | 
    
         
            +
                        }
         
     | 
| 
      
 370 
     | 
    
         
            +
                    }
         
     | 
| 
      
 371 
     | 
    
         
            +
                }
         
     | 
| 
      
 372 
     | 
    
         
            +
                return kopt;
         
     | 
| 
      
 373 
     | 
    
         
            +
            }
         
     | 
| 
      
 374 
     | 
    
         
            +
             
     | 
| 
       262 
375 
     | 
    
         
             
            VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
         
     | 
| 
       263 
376 
     | 
    
         
             
            {
         
     | 
| 
       264 
377 
     | 
    
         
             
                MatrixI *splits = state.splits;
         
     | 
| 
         @@ -336,7 +449,7 @@ void smawk(State state, RowParams rparams, VectorI *split_candidates) 
     | 
|
| 
       336 
449 
     | 
    
         
             
                }
         
     | 
| 
       337 
450 
     | 
    
         
             
            }
         
     | 
| 
       338 
451 
     | 
    
         | 
| 
       339 
     | 
    
         
            -
            void fill_even_positions(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 452 
     | 
    
         
            +
            inline void fill_even_positions(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       340 
453 
     | 
    
         
             
            {
         
     | 
| 
       341 
454 
     | 
    
         
             
                uint32_t row     = rparams.row;
         
     | 
| 
       342 
455 
     | 
    
         
             
                uint32_t imin    = rparams.imin;
         
     | 
| 
         @@ -345,9 +458,10 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       345 
458 
     | 
    
         
             
                uint32_t n       = split_candidates->size;
         
     | 
| 
       346 
459 
     | 
    
         
             
                uint32_t istepx2 = istep * 2;
         
     | 
| 
       347 
460 
     | 
    
         
             
                uint32_t jl      = vector_get_i(split_candidates, 0);
         
     | 
| 
       348 
     | 
    
         
            -
                VectorF *xsum    = state.xsum;
         
     | 
| 
       349 
     | 
    
         
            -
                VectorF *xsumsq  = state.xsumsq;
         
     | 
| 
       350 
     | 
    
         
            -
                MatrixI *splits  = state.splits;
         
     | 
| 
      
 461 
     | 
    
         
            +
                VectorF *const xsum    = state.xsum;
         
     | 
| 
      
 462 
     | 
    
         
            +
                VectorF *const xsumsq  = state.xsumsq;
         
     | 
| 
      
 463 
     | 
    
         
            +
                MatrixI *const splits  = state.splits;
         
     | 
| 
      
 464 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       351 
465 
     | 
    
         | 
| 
       352 
466 
     | 
    
         
             
                for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
         
     | 
| 
       353 
467 
     | 
    
         
             
                    while (vector_get_i(split_candidates, r) < jl) r++;
         
     | 
| 
         @@ -356,7 +470,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       356 
470 
     | 
    
         
             
                    uint32_t cost_base_row = row - 1;
         
     | 
| 
       357 
471 
     | 
    
         
             
                    uint32_t cost_base_col = rcandidate - 1;
         
     | 
| 
       358 
472 
     | 
    
         
             
                    LDouble cost           =
         
     | 
| 
       359 
     | 
    
         
            -
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) +  
     | 
| 
      
 473 
     | 
    
         
            +
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissim(rcandidate, i, xsum, xsumsq);
         
     | 
| 
       360 
474 
     | 
    
         | 
| 
       361 
475 
     | 
    
         
             
                    matrix_set_f(state.cost, row, i, cost);
         
     | 
| 
       362 
476 
     | 
    
         
             
                    matrix_set_i(state.splits, row, i, rcandidate);
         
     | 
| 
         @@ -367,7 +481,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       367 
481 
     | 
    
         
             
                        : vector_get_i(split_candidates, n - 1);
         
     | 
| 
       368 
482 
     | 
    
         | 
| 
       369 
483 
     | 
    
         
             
                    uint32_t jmax  = jh < i ? jh : i;
         
     | 
| 
       370 
     | 
    
         
            -
                    LDouble sjimin =  
     | 
| 
      
 484 
     | 
    
         
            +
                    LDouble sjimin = dissim(jmax, i, xsum, xsumsq);
         
     | 
| 
       371 
485 
     | 
    
         | 
| 
       372 
486 
     | 
    
         
             
                    for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
         
     | 
| 
       373 
487 
     | 
    
         
             
                        uint32_t jabs = vector_get_i(split_candidates, r);
         
     | 
| 
         @@ -376,7 +490,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       376 
490 
     | 
    
         
             
                        if (jabs < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       377 
491 
     | 
    
         | 
| 
       378 
492 
     | 
    
         
             
                        LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
         
     | 
| 
       379 
     | 
    
         
            -
                        LDouble sj        = cost_base +  
     | 
| 
      
 493 
     | 
    
         
            +
                        LDouble sj        = cost_base + dissim(jabs, i, xsum, xsumsq);
         
     | 
| 
       380 
494 
     | 
    
         
             
                        LDouble cost_prev = matrix_get_f(state.cost, row, i);
         
     | 
| 
       381 
495 
     | 
    
         | 
| 
       382 
496 
     | 
    
         
             
                        if (sj <= cost_prev) {
         
     | 
| 
         @@ -392,14 +506,15 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       392 
506 
     | 
    
         
             
                }
         
     | 
| 
       393 
507 
     | 
    
         
             
            }
         
     | 
| 
       394 
508 
     | 
    
         | 
| 
       395 
     | 
    
         
            -
            void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 509 
     | 
    
         
            +
            inline void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       396 
510 
     | 
    
         
             
            {
         
     | 
| 
       397 
     | 
    
         
            -
                const uint32_t row 
     | 
| 
       398 
     | 
    
         
            -
                const uint32_t imin 
     | 
| 
       399 
     | 
    
         
            -
                const uint32_t imax 
     | 
| 
       400 
     | 
    
         
            -
                const uint32_t istep 
     | 
| 
       401 
     | 
    
         
            -
                MatrixF *const cost 
     | 
| 
       402 
     | 
    
         
            -
                MatrixI *const splits 
     | 
| 
      
 511 
     | 
    
         
            +
                const uint32_t row     = rparams.row;
         
     | 
| 
      
 512 
     | 
    
         
            +
                const uint32_t imin    = rparams.imin;
         
     | 
| 
      
 513 
     | 
    
         
            +
                const uint32_t imax    = rparams.imax;
         
     | 
| 
      
 514 
     | 
    
         
            +
                const uint32_t istep   = rparams.istep;
         
     | 
| 
      
 515 
     | 
    
         
            +
                MatrixF *const cost    = state.cost;
         
     | 
| 
      
 516 
     | 
    
         
            +
                MatrixI *const splits  = state.splits;
         
     | 
| 
      
 517 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       403 
518 
     | 
    
         | 
| 
       404 
519 
     | 
    
         
             
                uint32_t optimal_split_idx_prev = 0;
         
     | 
| 
       405 
520 
     | 
    
         | 
| 
         @@ -408,7 +523,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       408 
523 
     | 
    
         
             
                    const uint32_t optimal_split_idx = optimal_split_idx_prev;
         
     | 
| 
       409 
524 
     | 
    
         
             
                    const uint32_t optimal_split     = vector_get_i(split_candidates, optimal_split_idx);
         
     | 
| 
       410 
525 
     | 
    
         
             
                    const uint32_t cost_prev         = matrix_get_f(cost, row - 1, optimal_split - 1);
         
     | 
| 
       411 
     | 
    
         
            -
                    const LDouble added_cost         =  
     | 
| 
      
 526 
     | 
    
         
            +
                    const LDouble added_cost         = dissim(optimal_split, i, state.xsum, state.xsumsq);
         
     | 
| 
       412 
527 
     | 
    
         | 
| 
       413 
528 
     | 
    
         
             
                    matrix_set_f(cost, row, i, cost_prev + added_cost);
         
     | 
| 
       414 
529 
     | 
    
         
             
                    matrix_set_i(splits, row, i, optimal_split);
         
     | 
| 
         @@ -421,7 +536,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       421 
536 
     | 
    
         
             
                        if (split > i) break;
         
     | 
| 
       422 
537 
     | 
    
         | 
| 
       423 
538 
     | 
    
         
             
                        LDouble split_cost =
         
     | 
| 
       424 
     | 
    
         
            -
                            matrix_get_f(cost, row - 1, split - 1) +  
     | 
| 
      
 539 
     | 
    
         
            +
                            matrix_get_f(cost, row - 1, split - 1) + dissim(split, i, state.xsum, state.xsumsq);
         
     | 
| 
       425 
540 
     | 
    
         | 
| 
       426 
541 
     | 
    
         
             
                        if (split_cost > matrix_get_f(cost, row, i)) continue;
         
     | 
| 
       427 
542 
     | 
    
         | 
| 
         @@ -432,7 +547,7 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       432 
547 
     | 
    
         
             
                }
         
     | 
| 
       433 
548 
     | 
    
         
             
            }
         
     | 
| 
       434 
549 
     | 
    
         | 
| 
       435 
     | 
    
         
            -
            VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 550 
     | 
    
         
            +
            inline VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       436 
551 
     | 
    
         
             
            {
         
     | 
| 
       437 
552 
     | 
    
         
             
                uint32_t imin  = rparams.imin;
         
     | 
| 
       438 
553 
     | 
    
         
             
                uint32_t row   = rparams.row;
         
     | 
| 
         @@ -445,6 +560,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       445 
560 
     | 
    
         
             
                uint32_t left   = 0;
         
     | 
| 
       446 
561 
     | 
    
         
             
                uint32_t right  = 0;
         
     | 
| 
       447 
562 
     | 
    
         
             
                VectorI *pruned = vector_dup_i(split_candidates, state.arena);
         
     | 
| 
      
 563 
     | 
    
         
            +
                FnDissim *const dissim = state.dissim;
         
     | 
| 
       448 
564 
     | 
    
         | 
| 
       449 
565 
     | 
    
         
             
                while (m > n)
         
     | 
| 
       450 
566 
     | 
    
         
             
                {
         
     | 
| 
         @@ -452,9 +568,9 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       452 
568 
     | 
    
         
             
                    uint32_t j     = vector_get_i(pruned, right);
         
     | 
| 
       453 
569 
     | 
    
         
             
                    uint32_t jnext = vector_get_i(pruned, right + 1);
         
     | 
| 
       454 
570 
     | 
    
         
             
                    LDouble sl     =
         
     | 
| 
       455 
     | 
    
         
            -
                        matrix_get_f(state.cost, row - 1, j - 1) +  
     | 
| 
      
 571 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, j - 1) + dissim(j, i, state.xsum, state.xsumsq);
         
     | 
| 
       456 
572 
     | 
    
         
             
                    LDouble snext  =
         
     | 
| 
       457 
     | 
    
         
            -
                        matrix_get_f(state.cost, row - 1, jnext - 1) +  
     | 
| 
      
 573 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, jnext - 1) + dissim(jnext, i, state.xsum, state.xsumsq);
         
     | 
| 
       458 
574 
     | 
    
         | 
| 
       459 
575 
     | 
    
         
             
                    if ((sl < snext) && (left < n - 1)) {
         
     | 
| 
       460 
576 
     | 
    
         
             
                        vector_set_i(pruned, left, j);
         
     | 
| 
         @@ -484,7 +600,8 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       484 
600 
     | 
    
         
             
                return pruned;
         
     | 
| 
       485 
601 
     | 
    
         
             
            }
         
     | 
| 
       486 
602 
     | 
    
         | 
| 
       487 
     | 
    
         
            -
             
     | 
| 
      
 603 
     | 
    
         
            +
            /* L2 aka Euclidean aka Mean dissimilarity criteria */
         
     | 
| 
      
 604 
     | 
    
         
            +
            inline LDouble dissimilarity_l2(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
         
     | 
| 
       488 
605 
     | 
    
         
             
                LDouble sji = 0.0;
         
     | 
| 
       489 
606 
     | 
    
         | 
| 
       490 
607 
     | 
    
         
             
                if (j >= i) return sji;
         
     | 
| 
         @@ -501,6 +618,48 @@ inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, Vec 
     | 
|
| 
       501 
618 
     | 
    
         
             
                return (sji > 0) ? sji : 0.0;
         
     | 
| 
       502 
619 
     | 
    
         
             
            }
         
     | 
| 
       503 
620 
     | 
    
         | 
| 
      
 621 
     | 
    
         
            +
            /* L1 aka Manhattan aka Median dissimilarity criteria */
         
     | 
| 
      
 622 
     | 
    
         
            +
            inline LDouble dissimilarity_l1(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict _xsumsq)
         
     | 
| 
      
 623 
     | 
    
         
            +
            {
         
     | 
| 
      
 624 
     | 
    
         
            +
                LDouble sji = 0.0;
         
     | 
| 
      
 625 
     | 
    
         
            +
             
     | 
| 
      
 626 
     | 
    
         
            +
                if (j >= i) return sji;
         
     | 
| 
      
 627 
     | 
    
         
            +
             
     | 
| 
      
 628 
     | 
    
         
            +
                if (j > 0) {
         
     | 
| 
      
 629 
     | 
    
         
            +
                    uint32_t median_idx = (i + j) >> 1;
         
     | 
| 
      
 630 
     | 
    
         
            +
             
     | 
| 
      
 631 
     | 
    
         
            +
                    if (((i - j + 1) % 2) == 1) {
         
     | 
| 
      
 632 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 633 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx - 1)
         
     | 
| 
      
 634 
     | 
    
         
            +
                            + vector_get_f(xsum, j - 1)
         
     | 
| 
      
 635 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 636 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 637 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 638 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 639 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx)
         
     | 
| 
      
 640 
     | 
    
         
            +
                            + vector_get_f(xsum, j - 1)
         
     | 
| 
      
 641 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 642 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 643 
     | 
    
         
            +
                    }
         
     | 
| 
      
 644 
     | 
    
         
            +
                } else { // j == 0
         
     | 
| 
      
 645 
     | 
    
         
            +
                    uint32_t median_idx = i >> 1;
         
     | 
| 
      
 646 
     | 
    
         
            +
             
     | 
| 
      
 647 
     | 
    
         
            +
                    if (((i + 1) % 2) == 1) {
         
     | 
| 
      
 648 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 649 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx - 1)
         
     | 
| 
      
 650 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 651 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 652 
     | 
    
         
            +
                    } else {
         
     | 
| 
      
 653 
     | 
    
         
            +
                        sji =
         
     | 
| 
      
 654 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx)
         
     | 
| 
      
 655 
     | 
    
         
            +
                            + vector_get_f(xsum, i)
         
     | 
| 
      
 656 
     | 
    
         
            +
                            - vector_get_f(xsum, median_idx);
         
     | 
| 
      
 657 
     | 
    
         
            +
                    }
         
     | 
| 
      
 658 
     | 
    
         
            +
                }
         
     | 
| 
      
 659 
     | 
    
         
            +
             
     | 
| 
      
 660 
     | 
    
         
            +
                return (sji < 0) ? 0.0 : sji;
         
     | 
| 
      
 661 
     | 
    
         
            +
            }
         
     | 
| 
      
 662 
     | 
    
         
            +
             
     | 
| 
       504 
663 
     | 
    
         
             
            inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
         
     | 
| 
       505 
664 
     | 
    
         
             
                VectorF *v;
         
     | 
| 
       506 
665 
     | 
    
         | 
| 
         @@ -656,7 +815,7 @@ Arena *arena_create(size_t capacity) { 
     | 
|
| 
       656 
815 
     | 
    
         
             
            }
         
     | 
| 
       657 
816 
     | 
    
         | 
| 
       658 
817 
     | 
    
         
             
            void *arena_alloc(Arena *arena, size_t size) {
         
     | 
| 
       659 
     | 
    
         
            -
                size = (size +  
     | 
| 
      
 818 
     | 
    
         
            +
                size = (size + 0xf) & ~0xf;
         
     | 
| 
       660 
819 
     | 
    
         | 
| 
       661 
820 
     | 
    
         
             
                if (arena->offset + size > arena->capacity) {
         
     | 
| 
       662 
821 
     | 
    
         
             
                    rb_raise(rb_eNoMemError, "Arena Insufficient Capacity");
         
     | 
    
        data/lib/ckmeans/clusterer.rb
    CHANGED
    
    | 
         @@ -2,18 +2,18 @@ 
     | 
|
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module Ckmeans
         
     | 
| 
       4 
4 
     | 
    
         
             
              class Clusterer # rubocop:disable Style/Documentation
         
     | 
| 
       5 
     | 
    
         
            -
                def initialize(entries, kmin, kmax = kmin, kestimate = : 
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(entries, kmin, kmax = kmin, kestimate = :fast)
         
     | 
| 
       6 
6 
     | 
    
         
             
                  @xcount = entries.size
         
     | 
| 
       7 
7 
     | 
    
         | 
| 
       8 
8 
     | 
    
         
             
                  raise ArgumentError, "Minimum cluster count is bigger than element count" if kmin > @xcount
         
     | 
| 
       9 
9 
     | 
    
         
             
                  raise ArgumentError, "Maximum cluster count is bigger than element count" if kmax > @xcount
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
                  @kmin 
     | 
| 
       12 
     | 
    
         
            -
                  @unique_xcount 
     | 
| 
       13 
     | 
    
         
            -
                  @kmax 
     | 
| 
       14 
     | 
    
         
            -
                  @xsorted_original 
     | 
| 
       15 
     | 
    
         
            -
                  @xsorted 
     | 
| 
       16 
     | 
    
         
            -
                  @ 
     | 
| 
      
 11 
     | 
    
         
            +
                  @kmin             = kmin
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @unique_xcount    = entries.uniq.size
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @kmax             = [@unique_xcount, kmax].min
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @xsorted_original = entries.sort
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @xsorted          = @xsorted_original.map(&:to_f)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  @use_gmm          = kestimate == :gmm
         
     | 
| 
       17 
17 
     | 
    
         
             
                end
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
19 
     | 
    
         
             
                def clusters
         
     | 
| 
         @@ -28,5 +28,3 @@ module Ckmeans 
     | 
|
| 
       28 
28 
     | 
    
         
             
                end
         
     | 
| 
       29 
29 
     | 
    
         
             
              end
         
     | 
| 
       30 
30 
     | 
    
         
             
            end
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
            require "ckmeans/extensions"
         
     | 
    
        data/lib/ckmeans/version.rb
    CHANGED
    
    
    
        data/lib/ckmeans.rb
    CHANGED
    
    
| 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Ckmedian
         
     | 
| 
      
 4 
     | 
    
         
            +
              class Clusterer # rubocop:disable Style/Documentation
         
     | 
| 
      
 5 
     | 
    
         
            +
                def initialize(entries, kmin, kmax = kmin)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  @xcount = entries.size
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  raise ArgumentError, "Minimum cluster count is bigger than element count" if kmin > @xcount
         
     | 
| 
      
 9 
     | 
    
         
            +
                  raise ArgumentError, "Maximum cluster count is bigger than element count" if kmax > @xcount
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                  @kmin             = kmin
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @unique_xcount    = entries.uniq.size
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @kmax             = [@unique_xcount, kmax].min
         
     | 
| 
      
 14 
     | 
    
         
            +
                  @xsorted_original = entries.sort
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @xsorted          = @xsorted_original.map(&:to_f)
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                def clusters
         
     | 
| 
      
 19 
     | 
    
         
            +
                  @clusters ||=
         
     | 
| 
      
 20 
     | 
    
         
            +
                    if @unique_xcount <= 1
         
     | 
| 
      
 21 
     | 
    
         
            +
                      [@xsorted_original]
         
     | 
| 
      
 22 
     | 
    
         
            +
                    else
         
     | 
| 
      
 23 
     | 
    
         
            +
                      sorted_group_sizes.each_with_object([]) do |size, groups|
         
     | 
| 
      
 24 
     | 
    
         
            +
                        groups << @xsorted_original.shift(size)
         
     | 
| 
      
 25 
     | 
    
         
            +
                      end
         
     | 
| 
      
 26 
     | 
    
         
            +
                    end
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: ckmeans
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version:  
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.0.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Vlad Lebedev
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire:
         
     | 
| 
       8 
9 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       9 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       10 
     | 
    
         
            -
            date: 2025- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2025-06-09 00:00:00.000000000 Z
         
     | 
| 
       11 
12 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       12 
13 
     | 
    
         
             
            description: Repeatable clustering of unidimensional data
         
     | 
| 
       13 
14 
     | 
    
         
             
            email:
         
     | 
| 
         @@ -32,6 +33,7 @@ files: 
     | 
|
| 
       32 
33 
     | 
    
         
             
            - lib/ckmeans.rb
         
     | 
| 
       33 
34 
     | 
    
         
             
            - lib/ckmeans/clusterer.rb
         
     | 
| 
       34 
35 
     | 
    
         
             
            - lib/ckmeans/version.rb
         
     | 
| 
      
 36 
     | 
    
         
            +
            - lib/ckmedian/clusterer.rb
         
     | 
| 
       35 
37 
     | 
    
         
             
            - sig/ckmeans.rbs
         
     | 
| 
       36 
38 
     | 
    
         
             
            homepage: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       37 
39 
     | 
    
         
             
            licenses:
         
     | 
| 
         @@ -41,6 +43,7 @@ metadata: 
     | 
|
| 
       41 
43 
     | 
    
         
             
              homepage_uri: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       42 
44 
     | 
    
         
             
              source_code_uri: https://github.com/vlebedeff/rb-ckmeans
         
     | 
| 
       43 
45 
     | 
    
         
             
              changelog_uri: https://github.com/vlebedeff/rb-ckmeans/blob/main/CHANGELOG.md
         
     | 
| 
      
 46 
     | 
    
         
            +
            post_install_message:
         
     | 
| 
       44 
47 
     | 
    
         
             
            rdoc_options: []
         
     | 
| 
       45 
48 
     | 
    
         
             
            require_paths:
         
     | 
| 
       46 
49 
     | 
    
         
             
            - lib
         
     | 
| 
         @@ -55,7 +58,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       55 
58 
     | 
    
         
             
                - !ruby/object:Gem::Version
         
     | 
| 
       56 
59 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       57 
60 
     | 
    
         
             
            requirements: []
         
     | 
| 
       58 
     | 
    
         
            -
            rubygems_version: 3. 
     | 
| 
      
 61 
     | 
    
         
            +
            rubygems_version: 3.4.19
         
     | 
| 
      
 62 
     | 
    
         
            +
            signing_key:
         
     | 
| 
       59 
63 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       60 
64 
     | 
    
         
             
            summary: Ruby implementation of Ckmeans.1d.dp
         
     | 
| 
       61 
65 
     | 
    
         
             
            test_files: []
         
     |