ckmeans 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.dockerignore +13 -0
 - data/CHANGELOG.md +9 -0
 - data/Dockerfile +11 -0
 - data/ext/ckmeans/extensions.c +157 -151
 - data/lib/ckmeans/version.rb +1 -1
 - metadata +4 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 5ad7e8c24dd367d5e6a6dd66abc529ae92079cf99d1c781a7646c929547b0e62
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 2e338ca878eba2d250ca61fff2ea8bee44ec8387b37e12b31600edf9da2b7130
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 8c59e1e159cc9cada8afed9e016a5d8956cfe909bb7b7d82c8d155f388fdf1924a49072d37e52065fa643a539da3a192767eddb38da95b2c2524bcc7d0a39ebd
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: f2b535377d441bc1f2ee309a5466c8231b425aa0dd9b0512aa36257defa12b3b645694ae953b2b5e3b6997c50bde796e8fa1c2f8f10d4055b1cc9cb6abcf1353
         
     | 
    
        data/.dockerignore
    ADDED
    
    
    
        data/CHANGELOG.md
    CHANGED
    
    | 
         @@ -1,5 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ## [Unreleased]
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
      
 3 
     | 
    
         
            +
            ## [1.0.1] - 2025-04-24
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            - https://github.com/vlebedeff/rb-ckmeans/pull/9
         
     | 
| 
      
 6 
     | 
    
         
            +
            - https://github.com/vlebedeff/rb-ckmeans/pull/8
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ## [1.0.0] - 2025-04-22
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            - https://github.com/vlebedeff/rb-ckmeans/pull/6
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
       3 
12 
     | 
    
         
             
            ## [0.1.2] - 2025-03-31
         
     | 
| 
       4 
13 
     | 
    
         | 
| 
       5 
14 
     | 
    
         
             
            - https://github.com/vlebedeff/rb-ckmeans/pull/3
         
     | 
    
        data/Dockerfile
    ADDED
    
    
    
        data/ext/ckmeans/extensions.c
    CHANGED
    
    | 
         @@ -3,16 +3,18 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
            #include <string.h>
         
     | 
| 
       4 
4 
     | 
    
         
             
            #include "ruby.h"
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
      
 6 
     | 
    
         
            +
            typedef long double LDouble;
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
       6 
8 
     | 
    
         
             
            typedef struct Arena {
         
     | 
| 
       7 
     | 
    
         
            -
                 
     | 
| 
       8 
     | 
    
         
            -
                 
     | 
| 
      
 9 
     | 
    
         
            +
                size_t capacity;
         
     | 
| 
      
 10 
     | 
    
         
            +
                size_t offset;
         
     | 
| 
       9 
11 
     | 
    
         
             
                uint8_t  *buffer;
         
     | 
| 
       10 
12 
     | 
    
         
             
            } Arena;
         
     | 
| 
       11 
13 
     | 
    
         | 
| 
       12 
14 
     | 
    
         
             
            typedef struct MatrixF {
         
     | 
| 
       13 
15 
     | 
    
         
             
                uint32_t ncols;
         
     | 
| 
       14 
16 
     | 
    
         
             
                uint32_t nrows;
         
     | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
      
 17 
     | 
    
         
            +
                LDouble *values;
         
     | 
| 
       16 
18 
     | 
    
         
             
            } MatrixF;
         
     | 
| 
       17 
19 
     | 
    
         | 
| 
       18 
20 
     | 
    
         
             
            typedef struct MatrixI {
         
     | 
| 
         @@ -22,12 +24,12 @@ typedef struct MatrixI { 
     | 
|
| 
       22 
24 
     | 
    
         
             
            } MatrixI;
         
     | 
| 
       23 
25 
     | 
    
         | 
| 
       24 
26 
     | 
    
         
             
            typedef struct VectorF {
         
     | 
| 
       25 
     | 
    
         
            -
                uint32_t  
     | 
| 
       26 
     | 
    
         
            -
                 
     | 
| 
      
 27 
     | 
    
         
            +
                uint32_t size;
         
     | 
| 
      
 28 
     | 
    
         
            +
                LDouble *values;
         
     | 
| 
       27 
29 
     | 
    
         
             
            } VectorF;
         
     | 
| 
       28 
30 
     | 
    
         | 
| 
       29 
31 
     | 
    
         
             
            typedef struct VectorI {
         
     | 
| 
       30 
     | 
    
         
            -
                uint32_t  
     | 
| 
      
 32 
     | 
    
         
            +
                uint32_t size;
         
     | 
| 
       31 
33 
     | 
    
         
             
                uint32_t *values;
         
     | 
| 
       32 
34 
     | 
    
         
             
            } VectorI;
         
     | 
| 
       33 
35 
     | 
    
         | 
| 
         @@ -52,69 +54,71 @@ typedef struct RowParams { 
     | 
|
| 
       52 
54 
     | 
    
         
             
            } RowParams;
         
     | 
| 
       53 
55 
     | 
    
         | 
| 
       54 
56 
     | 
    
         
             
            typedef struct {
         
     | 
| 
       55 
     | 
    
         
            -
                 
     | 
| 
       56 
     | 
    
         
            -
                 
     | 
| 
      
 57 
     | 
    
         
            +
                LDouble mean;
         
     | 
| 
      
 58 
     | 
    
         
            +
                LDouble variance;
         
     | 
| 
       57 
59 
     | 
    
         
             
            } SegmentStats;
         
     | 
| 
       58 
60 
     | 
    
         | 
| 
       59 
     | 
    
         
            -
            VALUE 
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
            Arena 
     | 
| 
       62 
     | 
    
         
            -
            void 
     | 
| 
       63 
     | 
    
         
            -
            void 
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
            MatrixF 
     | 
| 
       66 
     | 
    
         
            -
            MatrixI 
     | 
| 
       67 
     | 
    
         
            -
            void 
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
            void 
     | 
| 
       70 
     | 
    
         
            -
            void 
     | 
| 
       71 
     | 
    
         
            -
            uint32_t 
     | 
| 
       72 
     | 
    
         
            -
            void 
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
            VectorF 
     | 
| 
       75 
     | 
    
         
            -
            void 
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
            void 
     | 
| 
       79 
     | 
    
         
            -
            VectorI 
     | 
| 
       80 
     | 
    
         
            -
            VectorI 
     | 
| 
       81 
     | 
    
         
            -
            void 
     | 
| 
       82 
     | 
    
         
            -
            uint32_t 
     | 
| 
       83 
     | 
    
         
            -
            void 
     | 
| 
       84 
     | 
    
         
            -
            void 
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
      
 61 
     | 
    
         
            +
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            Arena *arena_create(size_t);
         
     | 
| 
      
 64 
     | 
    
         
            +
            void  *arena_alloc(Arena*, size_t);
         
     | 
| 
      
 65 
     | 
    
         
            +
            void  arena_destroy(Arena*);
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            MatrixF  *matrix_create_f(Arena*, uint32_t, uint32_t);
         
     | 
| 
      
 68 
     | 
    
         
            +
            MatrixI  *matrix_create_i(Arena*, uint32_t, uint32_t);
         
     | 
| 
      
 69 
     | 
    
         
            +
            void     matrix_set_f(MatrixF*, uint32_t, uint32_t, LDouble value);
         
     | 
| 
      
 70 
     | 
    
         
            +
            LDouble  matrix_get_f(MatrixF*, uint32_t, uint32_t);
         
     | 
| 
      
 71 
     | 
    
         
            +
            void     matrix_inspect_f(MatrixF*);
         
     | 
| 
      
 72 
     | 
    
         
            +
            void     matrix_set_i(MatrixI*, uint32_t, uint32_t, uint32_t value);
         
     | 
| 
      
 73 
     | 
    
         
            +
            uint32_t matrix_get_i(MatrixI*, uint32_t, uint32_t);
         
     | 
| 
      
 74 
     | 
    
         
            +
            void     matrix_inspect_i(MatrixI*);
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
            VectorF  *vector_create_f(Arena*, uint32_t);
         
     | 
| 
      
 77 
     | 
    
         
            +
            void     vector_set_f(VectorF*, uint32_t offset, LDouble value);
         
     | 
| 
      
 78 
     | 
    
         
            +
            LDouble  vector_get_f(VectorF*, uint32_t offset);
         
     | 
| 
      
 79 
     | 
    
         
            +
            LDouble  vector_get_diff_f(VectorF*, uint32_t, uint32_t);
         
     | 
| 
      
 80 
     | 
    
         
            +
            void     vector_inspect_f(VectorF*);
         
     | 
| 
      
 81 
     | 
    
         
            +
            VectorI  *vector_create_i(Arena*, uint32_t);
         
     | 
| 
      
 82 
     | 
    
         
            +
            VectorI  *vector_dup_i(VectorI*, Arena*);
         
     | 
| 
      
 83 
     | 
    
         
            +
            void     vector_set_i(VectorI*, uint32_t offset, uint32_t value);
         
     | 
| 
      
 84 
     | 
    
         
            +
            uint32_t vector_get_i(VectorI*, uint32_t offset);
         
     | 
| 
      
 85 
     | 
    
         
            +
            void     vector_downsize_i(VectorI*, uint32_t);
         
     | 
| 
      
 86 
     | 
    
         
            +
            void     vector_inspect_i(VectorI*);
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
            LDouble      dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
       87 
89 
     | 
    
         
             
            void         fill_row(State, uint32_t, uint32_t, uint32_t);
         
     | 
| 
       88 
90 
     | 
    
         
             
            void         smawk(State, RowParams, VectorI*);
         
     | 
| 
       89 
91 
     | 
    
         
             
            void         find_min_from_candidates(State, RowParams, VectorI*);
         
     | 
| 
       90 
     | 
    
         
            -
            VectorI 
     | 
| 
      
 92 
     | 
    
         
            +
            VectorI      *prune_candidates(State, RowParams, VectorI*);
         
     | 
| 
       91 
93 
     | 
    
         
             
            void         fill_even_positions(State, RowParams, VectorI*);
         
     | 
| 
       92 
94 
     | 
    
         
             
            SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t);
         
     | 
| 
       93 
     | 
    
         
            -
            VectorI 
     | 
| 
      
 95 
     | 
    
         
            +
            VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
         
     | 
| 
       94 
96 
     | 
    
         
             
            uint32_t     find_koptimal(State);
         
     | 
| 
       95 
97 
     | 
    
         | 
| 
       96 
98 
     | 
    
         
             
            void Init_extensions(void) {
         
     | 
| 
       97 
     | 
    
         
            -
                VALUE ckmeans_module 
     | 
| 
      
 99 
     | 
    
         
            +
                VALUE ckmeans_module  = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
         
     | 
| 
       98 
100 
     | 
    
         
             
                VALUE clusterer_class = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
         
     | 
| 
       99 
101 
     | 
    
         | 
| 
       100 
102 
     | 
    
         
             
                rb_define_private_method(clusterer_class, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
         
     | 
| 
       101 
103 
     | 
    
         
             
            }
         
     | 
| 
       102 
104 
     | 
    
         | 
| 
       103 
     | 
    
         
            -
            # define ARENA_MIN_CAPACITY  
     | 
| 
       104 
     | 
    
         
            -
            # define ALLOCATION_FACTOR 20
         
     | 
| 
      
 105 
     | 
    
         
            +
            # define ARENA_MIN_CAPACITY 100
         
     | 
| 
       105 
106 
     | 
    
         
             
            # define PIx2 (M_PI * 2.0)
         
     | 
| 
       106 
107 
     | 
    
         | 
| 
       107 
     | 
    
         
            -
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
                 
     | 
| 
       110 
     | 
    
         
            -
                 
     | 
| 
       111 
     | 
    
         
            -
                 
     | 
| 
       112 
     | 
    
         
            -
                 
     | 
| 
       113 
     | 
    
         
            -
                 
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
                 
     | 
| 
       116 
     | 
    
         
            -
             
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
      
 108 
     | 
    
         
            +
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 109 
     | 
    
         
            +
            {
         
     | 
| 
      
 110 
     | 
    
         
            +
                uint32_t xcount      = NUM2UINT(rb_iv_get(self, "@xcount"));
         
     | 
| 
      
 111 
     | 
    
         
            +
                uint32_t kmin        = NUM2UINT(rb_iv_get(self, "@kmin"));
         
     | 
| 
      
 112 
     | 
    
         
            +
                uint32_t kmax        = NUM2UINT(rb_iv_get(self, "@kmax"));
         
     | 
| 
      
 113 
     | 
    
         
            +
                bool apply_deviation = RTEST(rb_iv_get(self, "@apply_bic_deviation"));
         
     | 
| 
      
 114 
     | 
    
         
            +
                VALUE rb_xsorted     = rb_iv_get(self, "@xsorted");
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                Arena *arena         =
         
     | 
| 
      
 117 
     | 
    
         
            +
                    arena_create(
         
     | 
| 
      
 118 
     | 
    
         
            +
                        sizeof(LDouble) * xcount * (kmax + 4) +
         
     | 
| 
      
 119 
     | 
    
         
            +
                        sizeof(uint32_t) * xcount * kmax * 5 +
         
     | 
| 
      
 120 
     | 
    
         
            +
                        ARENA_MIN_CAPACITY
         
     | 
| 
      
 121 
     | 
    
         
            +
                    );
         
     | 
| 
       118 
122 
     | 
    
         | 
| 
       119 
123 
     | 
    
         
             
                if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
         
     | 
| 
       120 
124 
     | 
    
         | 
| 
         @@ -125,7 +129,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       125 
129 
     | 
    
         
             
                VectorF *xsumsq  = vector_create_f(arena, xcount);
         
     | 
| 
       126 
130 
     | 
    
         | 
| 
       127 
131 
     | 
    
         
             
                for (uint32_t i = 0; i < xcount; i++) {
         
     | 
| 
       128 
     | 
    
         
            -
                     
     | 
| 
      
 132 
     | 
    
         
            +
                    LDouble xi = NUM2DBL(rb_ary_entry(rb_xsorted, i));
         
     | 
| 
       129 
133 
     | 
    
         
             
                    vector_set_f(xsorted, i, xi);
         
     | 
| 
       130 
134 
     | 
    
         
             
                }
         
     | 
| 
       131 
135 
     | 
    
         | 
| 
         @@ -143,17 +147,17 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       143 
147 
     | 
    
         
             
                };
         
     | 
| 
       144 
148 
     | 
    
         | 
| 
       145 
149 
     | 
    
         | 
| 
       146 
     | 
    
         
            -
                 
     | 
| 
       147 
     | 
    
         
            -
                 
     | 
| 
      
 150 
     | 
    
         
            +
                LDouble shift        = vector_get_f(xsorted, xcount / 2);
         
     | 
| 
      
 151 
     | 
    
         
            +
                LDouble diff_initial = vector_get_f(xsorted, 0) - shift;
         
     | 
| 
       148 
152 
     | 
    
         | 
| 
       149 
153 
     | 
    
         
             
                vector_set_f(xsum, 0, diff_initial);
         
     | 
| 
       150 
154 
     | 
    
         
             
                vector_set_f(xsumsq, 0, diff_initial * diff_initial);
         
     | 
| 
       151 
155 
     | 
    
         | 
| 
       152 
156 
     | 
    
         
             
                for (uint32_t i = 1; i < xcount; i++) {
         
     | 
| 
       153 
     | 
    
         
            -
                     
     | 
| 
       154 
     | 
    
         
            -
                     
     | 
| 
       155 
     | 
    
         
            -
                     
     | 
| 
       156 
     | 
    
         
            -
                     
     | 
| 
      
 157 
     | 
    
         
            +
                    LDouble xi          = vector_get_f(xsorted, i);
         
     | 
| 
      
 158 
     | 
    
         
            +
                    LDouble xsum_prev   = vector_get_f(xsum, i - 1);
         
     | 
| 
      
 159 
     | 
    
         
            +
                    LDouble xsumsq_prev = vector_get_f(xsumsq, i - 1);
         
     | 
| 
      
 160 
     | 
    
         
            +
                    LDouble diff        = xi - shift;
         
     | 
| 
       157 
161 
     | 
    
         | 
| 
       158 
162 
     | 
    
         
             
                    vector_set_f(xsum, i, xsum_prev + diff);
         
     | 
| 
       159 
163 
     | 
    
         
             
                    vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
         
     | 
| 
         @@ -168,7 +172,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       168 
172 
     | 
    
         | 
| 
       169 
173 
     | 
    
         
             
                uint32_t koptimal = find_koptimal(state);
         
     | 
| 
       170 
174 
     | 
    
         | 
| 
       171 
     | 
    
         
            -
                VectorI *sizes =  
     | 
| 
      
 175 
     | 
    
         
            +
                VectorI *sizes = vector_create_i(arena, koptimal);
         
     | 
| 
      
 176 
     | 
    
         
            +
                backtrack_sizes(state, sizes, koptimal);
         
     | 
| 
       172 
177 
     | 
    
         | 
| 
       173 
178 
     | 
    
         
             
                /* printf("XSORTED \t"); vector_inspect_f(xsorted); */
         
     | 
| 
       174 
179 
     | 
    
         
             
                /* printf("K OPTIMAL: %lld\n", koptimal); */
         
     | 
| 
         @@ -176,8 +181,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       176 
181 
     | 
    
         
             
                /* printf("FINAL COST\n"); matrix_inspect_f(cost); */
         
     | 
| 
       177 
182 
     | 
    
         
             
                /* printf("FINAL SPLITS\n"); matrix_inspect_i(splits); */
         
     | 
| 
       178 
183 
     | 
    
         | 
| 
       179 
     | 
    
         
            -
                VALUE response = rb_ary_new2(sizes-> 
     | 
| 
       180 
     | 
    
         
            -
                for (uint32_t i = 0; i < sizes-> 
     | 
| 
      
 184 
     | 
    
         
            +
                VALUE response = rb_ary_new2(sizes->size);
         
     | 
| 
      
 185 
     | 
    
         
            +
                for (uint32_t i = 0; i < sizes->size; i++) {
         
     | 
| 
       181 
186 
     | 
    
         
             
                    VALUE size = LONG2NUM(vector_get_i(sizes, i));
         
     | 
| 
       182 
187 
     | 
    
         
             
                    rb_ary_store(response, i, size);
         
     | 
| 
       183 
188 
     | 
    
         
             
                }
         
     | 
| 
         @@ -189,29 +194,30 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       189 
194 
     | 
    
         | 
| 
       190 
195 
     | 
    
         
             
            uint32_t find_koptimal(State state)
         
     | 
| 
       191 
196 
     | 
    
         
             
            {
         
     | 
| 
       192 
     | 
    
         
            -
                uint32_t kmin 
     | 
| 
       193 
     | 
    
         
            -
                uint32_t kmax 
     | 
| 
       194 
     | 
    
         
            -
                uint32_t xcount 
     | 
| 
       195 
     | 
    
         
            -
                uint32_t kopt 
     | 
| 
       196 
     | 
    
         
            -
                uint32_t xindex_max 
     | 
| 
       197 
     | 
    
         
            -
                VectorF *xsorted 
     | 
| 
       198 
     | 
    
         
            -
                 
     | 
| 
       199 
     | 
    
         
            -
                 
     | 
| 
       200 
     | 
    
         
            -
                 
     | 
| 
       201 
     | 
    
         
            -
                 
     | 
| 
       202 
     | 
    
         
            -
             
     | 
| 
      
 197 
     | 
    
         
            +
                uint32_t kmin       = state.kmin;
         
     | 
| 
      
 198 
     | 
    
         
            +
                uint32_t kmax       = state.kmax;
         
     | 
| 
      
 199 
     | 
    
         
            +
                uint32_t xcount     = state.xcount;
         
     | 
| 
      
 200 
     | 
    
         
            +
                uint32_t kopt       = kmin;
         
     | 
| 
      
 201 
     | 
    
         
            +
                uint32_t xindex_max = state.xcount - 1;
         
     | 
| 
      
 202 
     | 
    
         
            +
                VectorF *xsorted    = state.xsorted;
         
     | 
| 
      
 203 
     | 
    
         
            +
                LDouble x0          = vector_get_f(xsorted, 0);
         
     | 
| 
      
 204 
     | 
    
         
            +
                LDouble xn          = vector_get_f(xsorted, xindex_max);
         
     | 
| 
      
 205 
     | 
    
         
            +
                LDouble max_bic     = 0.0;
         
     | 
| 
      
 206 
     | 
    
         
            +
                LDouble xcount_log  = log((LDouble) xcount);
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                VectorI *sizes = vector_create_i(state.arena, kmax);
         
     | 
| 
       203 
209 
     | 
    
         
             
                for (uint32_t k = kmin; k <= kmax; k++) {
         
     | 
| 
       204 
210 
     | 
    
         
             
                    uint32_t index_right, index_left = 0;
         
     | 
| 
       205 
     | 
    
         
            -
                     
     | 
| 
       206 
     | 
    
         
            -
                     
     | 
| 
      
 211 
     | 
    
         
            +
                    LDouble bin_left, bin_right, loglikelihood = 0.0;
         
     | 
| 
      
 212 
     | 
    
         
            +
                    backtrack_sizes(state, sizes, k);
         
     | 
| 
       207 
213 
     | 
    
         | 
| 
       208 
214 
     | 
    
         
             
                    for (uint32_t kb = 0; kb < k; kb++) {
         
     | 
| 
       209 
     | 
    
         
            -
                        uint32_t npoints 
     | 
| 
       210 
     | 
    
         
            -
                        index_right 
     | 
| 
       211 
     | 
    
         
            -
                         
     | 
| 
       212 
     | 
    
         
            -
                         
     | 
| 
       213 
     | 
    
         
            -
                        bin_left 
     | 
| 
       214 
     | 
    
         
            -
                        bin_right 
     | 
| 
      
 215 
     | 
    
         
            +
                        uint32_t npoints = vector_get_i(sizes, kb);
         
     | 
| 
      
 216 
     | 
    
         
            +
                        index_right      = index_left + npoints - 1;
         
     | 
| 
      
 217 
     | 
    
         
            +
                        LDouble xleft    = vector_get_f(xsorted, index_left);
         
     | 
| 
      
 218 
     | 
    
         
            +
                        LDouble xright   = vector_get_f(xsorted, index_right);
         
     | 
| 
      
 219 
     | 
    
         
            +
                        bin_left         = xleft;
         
     | 
| 
      
 220 
     | 
    
         
            +
                        bin_right        = xright;
         
     | 
| 
       215 
221 
     | 
    
         | 
| 
       216 
222 
     | 
    
         
             
                        if (xleft == xright) {
         
     | 
| 
       217 
223 
     | 
    
         
             
                            bin_left  = index_left == 0
         
     | 
| 
         @@ -222,18 +228,18 @@ uint32_t find_koptimal(State state) 
     | 
|
| 
       222 
228 
     | 
    
         
             
                                : xn;
         
     | 
| 
       223 
229 
     | 
    
         
             
                        }
         
     | 
| 
       224 
230 
     | 
    
         | 
| 
       225 
     | 
    
         
            -
                         
     | 
| 
       226 
     | 
    
         
            -
                        SegmentStats stats 
     | 
| 
       227 
     | 
    
         
            -
                         
     | 
| 
       228 
     | 
    
         
            -
                         
     | 
| 
      
 231 
     | 
    
         
            +
                        LDouble bin_width  = bin_right - bin_left;
         
     | 
| 
      
 232 
     | 
    
         
            +
                        SegmentStats stats = shifted_data_variance(xsorted, index_left, index_right);
         
     | 
| 
      
 233 
     | 
    
         
            +
                        LDouble mean       = stats.mean;
         
     | 
| 
      
 234 
     | 
    
         
            +
                        LDouble variance   = stats.variance;
         
     | 
| 
       229 
235 
     | 
    
         | 
| 
       230 
236 
     | 
    
         
             
                        if (variance > 0) {
         
     | 
| 
       231 
237 
     | 
    
         
             
                            for (uint32_t i = index_left; i <= index_right; i++) {
         
     | 
| 
       232 
     | 
    
         
            -
                                 
     | 
| 
      
 238 
     | 
    
         
            +
                                LDouble xi     = vector_get_f(xsorted, i);
         
     | 
| 
       233 
239 
     | 
    
         
             
                                loglikelihood += -(xi - mean) * (xi - mean) / (2.0 * variance);
         
     | 
| 
       234 
240 
     | 
    
         
             
                            }
         
     | 
| 
       235 
241 
     | 
    
         
             
                            loglikelihood += npoints * (
         
     | 
| 
       236 
     | 
    
         
            -
                                (log(npoints / ( 
     | 
| 
      
 242 
     | 
    
         
            +
                                (state.apply_deviation ? 0.0 : log(npoints / (LDouble) xcount)) -
         
     | 
| 
       237 
243 
     | 
    
         
             
                                (0.5 * log(PIx2 * variance))
         
     | 
| 
       238 
244 
     | 
    
         
             
                            );
         
     | 
| 
       239 
245 
     | 
    
         
             
                        } else {
         
     | 
| 
         @@ -243,24 +249,23 @@ uint32_t find_koptimal(State state) 
     | 
|
| 
       243 
249 
     | 
    
         
             
                        index_left = index_right + 1;
         
     | 
| 
       244 
250 
     | 
    
         
             
                    }
         
     | 
| 
       245 
251 
     | 
    
         | 
| 
       246 
     | 
    
         
            -
                     
     | 
| 
      
 252 
     | 
    
         
            +
                    LDouble bic = (2.0 * loglikelihood) - (((3 * k) - 1) * xcount_log);
         
     | 
| 
       247 
253 
     | 
    
         | 
| 
       248 
254 
     | 
    
         
             
                    if (k == kmin) {
         
     | 
| 
       249 
255 
     | 
    
         
             
                        max_bic = bic;
         
     | 
| 
       250 
     | 
    
         
            -
                        kopt 
     | 
| 
      
 256 
     | 
    
         
            +
                        kopt    = kmin;
         
     | 
| 
       251 
257 
     | 
    
         
             
                    } else if (bic > max_bic) {
         
     | 
| 
       252 
258 
     | 
    
         
             
                        max_bic = bic;
         
     | 
| 
       253 
     | 
    
         
            -
                        kopt 
     | 
| 
      
 259 
     | 
    
         
            +
                        kopt    = k;
         
     | 
| 
       254 
260 
     | 
    
         
             
                    }
         
     | 
| 
       255 
261 
     | 
    
         
             
                }
         
     | 
| 
       256 
262 
     | 
    
         | 
| 
       257 
263 
     | 
    
         
             
                return kopt;
         
     | 
| 
       258 
264 
     | 
    
         
             
            }
         
     | 
| 
       259 
265 
     | 
    
         | 
| 
       260 
     | 
    
         
            -
            VectorI *backtrack_sizes(State state, uint32_t k)
         
     | 
| 
      
 266 
     | 
    
         
            +
            VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
         
     | 
| 
       261 
267 
     | 
    
         
             
            {
         
     | 
| 
       262 
268 
     | 
    
         
             
                MatrixI *splits = state.splits;
         
     | 
| 
       263 
     | 
    
         
            -
                VectorI *sizes  = vector_create_i(state.arena, k);
         
     | 
| 
       264 
269 
     | 
    
         
             
                uint32_t xcount = state.xcount;
         
     | 
| 
       265 
270 
     | 
    
         
             
                uint32_t right  = xcount - 1;
         
     | 
| 
       266 
271 
     | 
    
         
             
                uint32_t left   = 0;
         
     | 
| 
         @@ -280,15 +285,15 @@ VectorI *backtrack_sizes(State state, uint32_t k) 
     | 
|
| 
       280 
285 
     | 
    
         
             
            SegmentStats shifted_data_variance(VectorF *xsorted, uint32_t left, uint32_t right)
         
     | 
| 
       281 
286 
     | 
    
         
             
            {
         
     | 
| 
       282 
287 
     | 
    
         
             
                const uint32_t n   = right - left + 1;
         
     | 
| 
       283 
     | 
    
         
            -
                 
     | 
| 
       284 
     | 
    
         
            -
                 
     | 
| 
      
 288 
     | 
    
         
            +
                LDouble sum        = 0.0;
         
     | 
| 
      
 289 
     | 
    
         
            +
                LDouble sumsq      = 0.0;
         
     | 
| 
       285 
290 
     | 
    
         
             
                SegmentStats stats = { .mean = 0.0, .variance = 0.0 };
         
     | 
| 
       286 
291 
     | 
    
         | 
| 
       287 
292 
     | 
    
         
             
                if (right >= left) {
         
     | 
| 
       288 
     | 
    
         
            -
                    const  
     | 
| 
      
 293 
     | 
    
         
            +
                    const LDouble median = vector_get_f(xsorted, (left + right) / 2);
         
     | 
| 
       289 
294 
     | 
    
         | 
| 
       290 
295 
     | 
    
         
             
                    for (uint32_t i = left; i <= right; i++) {
         
     | 
| 
       291 
     | 
    
         
            -
                        const  
     | 
| 
      
 296 
     | 
    
         
            +
                        const LDouble sumi = vector_get_f(xsorted, i) - median;
         
     | 
| 
       292 
297 
     | 
    
         | 
| 
       293 
298 
     | 
    
         
             
                        sum   += sumi;
         
     | 
| 
       294 
299 
     | 
    
         
             
                        sumsq += sumi * sumi;
         
     | 
| 
         @@ -341,7 +346,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       341 
346 
     | 
    
         
             
                uint32_t imin    = rparams.imin;
         
     | 
| 
       342 
347 
     | 
    
         
             
                uint32_t imax    = rparams.imax;
         
     | 
| 
       343 
348 
     | 
    
         
             
                uint32_t istep   = rparams.istep;
         
     | 
| 
       344 
     | 
    
         
            -
                uint32_t n       = split_candidates-> 
     | 
| 
      
 349 
     | 
    
         
            +
                uint32_t n       = split_candidates->size;
         
     | 
| 
       345 
350 
     | 
    
         
             
                uint32_t istepx2 = istep * 2;
         
     | 
| 
       346 
351 
     | 
    
         
             
                uint32_t jl      = vector_get_i(split_candidates, 0);
         
     | 
| 
       347 
352 
     | 
    
         
             
                VectorF *xsum    = state.xsum;
         
     | 
| 
         @@ -351,22 +356,22 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       351 
356 
     | 
    
         
             
                for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
         
     | 
| 
       352 
357 
     | 
    
         
             
                    while (vector_get_i(split_candidates, r) < jl) r++;
         
     | 
| 
       353 
358 
     | 
    
         | 
| 
       354 
     | 
    
         
            -
                    uint32_t rcandidate 
     | 
| 
      
 359 
     | 
    
         
            +
                    uint32_t rcandidate    = vector_get_i(split_candidates, r);
         
     | 
| 
       355 
360 
     | 
    
         
             
                    uint32_t cost_base_row = row - 1;
         
     | 
| 
       356 
361 
     | 
    
         
             
                    uint32_t cost_base_col = rcandidate - 1;
         
     | 
| 
       357 
     | 
    
         
            -
                     
     | 
| 
      
 362 
     | 
    
         
            +
                    LDouble cost           =
         
     | 
| 
       358 
363 
     | 
    
         
             
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissimilarity(rcandidate, i, xsum, xsumsq);
         
     | 
| 
       359 
364 
     | 
    
         | 
| 
       360 
365 
     | 
    
         
             
                    matrix_set_f(state.cost, row, i, cost);
         
     | 
| 
       361 
366 
     | 
    
         
             
                    matrix_set_i(state.splits, row, i, rcandidate);
         
     | 
| 
       362 
367 
     | 
    
         | 
| 
       363 
     | 
    
         
            -
                    uint32_t jh 
     | 
| 
      
 368 
     | 
    
         
            +
                    uint32_t jh =
         
     | 
| 
       364 
369 
     | 
    
         
             
                        (i + istep) <= imax
         
     | 
| 
       365 
370 
     | 
    
         
             
                        ? matrix_get_i(splits, row, i + istep)
         
     | 
| 
       366 
371 
     | 
    
         
             
                        : vector_get_i(split_candidates, n - 1);
         
     | 
| 
       367 
372 
     | 
    
         | 
| 
       368 
     | 
    
         
            -
                    uint32_t jmax 
     | 
| 
       369 
     | 
    
         
            -
                     
     | 
| 
      
 373 
     | 
    
         
            +
                    uint32_t jmax  = jh < i ? jh : i;
         
     | 
| 
      
 374 
     | 
    
         
            +
                    LDouble sjimin = dissimilarity(jmax, i, xsum, xsumsq);
         
     | 
| 
       370 
375 
     | 
    
         | 
| 
       371 
376 
     | 
    
         
             
                    for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
         
     | 
| 
       372 
377 
     | 
    
         
             
                        uint32_t jabs = vector_get_i(split_candidates, r);
         
     | 
| 
         @@ -374,9 +379,9 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       374 
379 
     | 
    
         
             
                        if (jabs > i) break;
         
     | 
| 
       375 
380 
     | 
    
         
             
                        if (jabs < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       376 
381 
     | 
    
         | 
| 
       377 
     | 
    
         
            -
                         
     | 
| 
       378 
     | 
    
         
            -
                         
     | 
| 
       379 
     | 
    
         
            -
                         
     | 
| 
      
 382 
     | 
    
         
            +
                        LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
         
     | 
| 
      
 383 
     | 
    
         
            +
                        LDouble sj        = cost_base + dissimilarity(jabs, i, xsum, xsumsq);
         
     | 
| 
      
 384 
     | 
    
         
            +
                        LDouble cost_prev = matrix_get_f(state.cost, row, i);
         
     | 
| 
       380 
385 
     | 
    
         | 
| 
       381 
386 
     | 
    
         
             
                        if (sj <= cost_prev) {
         
     | 
| 
       382 
387 
     | 
    
         
             
                            matrix_set_f(state.cost, row, i, sj);
         
     | 
| 
         @@ -407,19 +412,19 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       407 
412 
     | 
    
         
             
                    const uint32_t optimal_split_idx = optimal_split_idx_prev;
         
     | 
| 
       408 
413 
     | 
    
         
             
                    const uint32_t optimal_split     = vector_get_i(split_candidates, optimal_split_idx);
         
     | 
| 
       409 
414 
     | 
    
         
             
                    const uint32_t cost_prev         = matrix_get_f(cost, row - 1, optimal_split - 1);
         
     | 
| 
       410 
     | 
    
         
            -
                    const  
     | 
| 
      
 415 
     | 
    
         
            +
                    const LDouble added_cost         = dissimilarity(optimal_split, i, state.xsum, state.xsumsq);
         
     | 
| 
       411 
416 
     | 
    
         | 
| 
       412 
417 
     | 
    
         
             
                    matrix_set_f(cost, row, i, cost_prev + added_cost);
         
     | 
| 
       413 
418 
     | 
    
         
             
                    matrix_set_i(splits, row, i, optimal_split);
         
     | 
| 
       414 
419 
     | 
    
         | 
| 
       415 
     | 
    
         
            -
                    for (uint32_t r = optimal_split_idx + 1; r < split_candidates-> 
     | 
| 
      
 420 
     | 
    
         
            +
                    for (uint32_t r = optimal_split_idx + 1; r < split_candidates->size; r++)
         
     | 
| 
       416 
421 
     | 
    
         
             
                    {
         
     | 
| 
       417 
422 
     | 
    
         
             
                        uint32_t split = vector_get_i(split_candidates, r);
         
     | 
| 
       418 
423 
     | 
    
         | 
| 
       419 
424 
     | 
    
         
             
                        if (split < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       420 
425 
     | 
    
         
             
                        if (split > i) break;
         
     | 
| 
       421 
426 
     | 
    
         | 
| 
       422 
     | 
    
         
            -
                         
     | 
| 
      
 427 
     | 
    
         
            +
                        LDouble split_cost =
         
     | 
| 
       423 
428 
     | 
    
         
             
                            matrix_get_f(cost, row - 1, split - 1) + dissimilarity(split, i, state.xsum, state.xsumsq);
         
     | 
| 
       424 
429 
     | 
    
         | 
| 
       425 
430 
     | 
    
         
             
                        if (split_cost > matrix_get_f(cost, row, i)) continue;
         
     | 
| 
         @@ -437,7 +442,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       437 
442 
     | 
    
         
             
                uint32_t row   = rparams.row;
         
     | 
| 
       438 
443 
     | 
    
         
             
                uint32_t istep = rparams.istep;
         
     | 
| 
       439 
444 
     | 
    
         
             
                uint32_t n     = ((rparams.imax - imin) / istep) + 1;
         
     | 
| 
       440 
     | 
    
         
            -
                uint32_t m     = split_candidates-> 
     | 
| 
      
 445 
     | 
    
         
            +
                uint32_t m     = split_candidates->size;
         
     | 
| 
       441 
446 
     | 
    
         | 
| 
       442 
447 
     | 
    
         
             
                if (n >= m) return split_candidates;
         
     | 
| 
       443 
448 
     | 
    
         | 
| 
         @@ -447,12 +452,12 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       447 
452 
     | 
    
         | 
| 
       448 
453 
     | 
    
         
             
                while (m > n)
         
     | 
| 
       449 
454 
     | 
    
         
             
                {
         
     | 
| 
       450 
     | 
    
         
            -
                    uint32_t i 
     | 
| 
       451 
     | 
    
         
            -
                    uint32_t j 
     | 
| 
       452 
     | 
    
         
            -
                    uint32_t jnext 
     | 
| 
       453 
     | 
    
         
            -
                     
     | 
| 
      
 455 
     | 
    
         
            +
                    uint32_t i     = imin + left * istep;
         
     | 
| 
      
 456 
     | 
    
         
            +
                    uint32_t j     = vector_get_i(pruned, right);
         
     | 
| 
      
 457 
     | 
    
         
            +
                    uint32_t jnext = vector_get_i(pruned, right + 1);
         
     | 
| 
      
 458 
     | 
    
         
            +
                    LDouble sl     =
         
     | 
| 
       454 
459 
     | 
    
         
             
                        matrix_get_f(state.cost, row - 1, j - 1) + dissimilarity(j, i, state.xsum, state.xsumsq);
         
     | 
| 
       455 
     | 
    
         
            -
                     
     | 
| 
      
 460 
     | 
    
         
            +
                    LDouble snext  =
         
     | 
| 
       456 
461 
     | 
    
         
             
                        matrix_get_f(state.cost, row - 1, jnext - 1) + dissimilarity(jnext, i, state.xsum, state.xsumsq);
         
     | 
| 
       457 
462 
     | 
    
         | 
| 
       458 
463 
     | 
    
         
             
                    if ((sl < snext) && (left < n - 1)) {
         
     | 
| 
         @@ -483,86 +488,86 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       483 
488 
     | 
    
         
             
                return pruned;
         
     | 
| 
       484 
489 
     | 
    
         
             
            }
         
     | 
| 
       485 
490 
     | 
    
         | 
| 
       486 
     | 
    
         
            -
             
     | 
| 
       487 
     | 
    
         
            -
                 
     | 
| 
      
 491 
     | 
    
         
            +
            inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
         
     | 
| 
      
 492 
     | 
    
         
            +
                LDouble sji = 0.0;
         
     | 
| 
       488 
493 
     | 
    
         | 
| 
       489 
494 
     | 
    
         
             
                if (j >= i) return sji;
         
     | 
| 
       490 
495 
     | 
    
         | 
| 
       491 
496 
     | 
    
         
             
                if (j > 0) {
         
     | 
| 
       492 
     | 
    
         
            -
                     
     | 
| 
       493 
     | 
    
         
            -
                    uint32_t segment_size 
     | 
| 
       494 
     | 
    
         
            -
                    sji 
     | 
| 
      
 497 
     | 
    
         
            +
                    LDouble segment_diff  = vector_get_diff_f(xsum, i, j - 1);
         
     | 
| 
      
 498 
     | 
    
         
            +
                    uint32_t segment_size = i - j + 1;
         
     | 
| 
      
 499 
     | 
    
         
            +
                    sji                   = vector_get_diff_f(xsumsq, i, j - 1) - (segment_diff * segment_diff / segment_size);
         
     | 
| 
       495 
500 
     | 
    
         
             
                } else {
         
     | 
| 
       496 
     | 
    
         
            -
                     
     | 
| 
       497 
     | 
    
         
            -
                    sji 
     | 
| 
      
 501 
     | 
    
         
            +
                    LDouble xsumi = vector_get_f(xsum, i);
         
     | 
| 
      
 502 
     | 
    
         
            +
                    sji           = vector_get_f(xsumsq, i) - (xsumi * xsumi / (i + 1));
         
     | 
| 
       498 
503 
     | 
    
         
             
                }
         
     | 
| 
       499 
504 
     | 
    
         | 
| 
       500 
505 
     | 
    
         
             
                return (sji > 0) ? sji : 0.0;
         
     | 
| 
       501 
506 
     | 
    
         
             
            }
         
     | 
| 
       502 
507 
     | 
    
         | 
| 
       503 
     | 
    
         
            -
            VectorF *vector_create_f(Arena *arena, uint32_t  
     | 
| 
      
 508 
     | 
    
         
            +
            inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
         
     | 
| 
       504 
509 
     | 
    
         
             
                VectorF *v;
         
     | 
| 
       505 
510 
     | 
    
         | 
| 
       506 
     | 
    
         
            -
                v 
     | 
| 
       507 
     | 
    
         
            -
                v->values 
     | 
| 
       508 
     | 
    
         
            -
                v-> 
     | 
| 
      
 511 
     | 
    
         
            +
                v         = arena_alloc(arena, sizeof(*v));
         
     | 
| 
      
 512 
     | 
    
         
            +
                v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
         
     | 
| 
      
 513 
     | 
    
         
            +
                v->size   = size;
         
     | 
| 
       509 
514 
     | 
    
         | 
| 
       510 
515 
     | 
    
         
             
                return v;
         
     | 
| 
       511 
516 
     | 
    
         
             
            }
         
     | 
| 
       512 
517 
     | 
    
         | 
| 
       513 
     | 
    
         
            -
            VectorI *vector_create_i(Arena *arena, uint32_t  
     | 
| 
      
 518 
     | 
    
         
            +
            inline VectorI *vector_create_i(Arena *arena, uint32_t size) {
         
     | 
| 
       514 
519 
     | 
    
         
             
                VectorI *v;
         
     | 
| 
       515 
520 
     | 
    
         | 
| 
       516 
     | 
    
         
            -
                v 
     | 
| 
       517 
     | 
    
         
            -
                v->values 
     | 
| 
       518 
     | 
    
         
            -
                v-> 
     | 
| 
      
 521 
     | 
    
         
            +
                v         = arena_alloc(arena, sizeof(*v));
         
     | 
| 
      
 522 
     | 
    
         
            +
                v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
         
     | 
| 
      
 523 
     | 
    
         
            +
                v->size   = size;
         
     | 
| 
       519 
524 
     | 
    
         | 
| 
       520 
525 
     | 
    
         
             
                return v;
         
     | 
| 
       521 
526 
     | 
    
         
             
            }
         
     | 
| 
       522 
527 
     | 
    
         | 
| 
       523 
     | 
    
         
            -
            VectorI *vector_dup_i(VectorI *v, Arena *arena)
         
     | 
| 
      
 528 
     | 
    
         
            +
            inline VectorI *vector_dup_i(VectorI *v, Arena *arena)
         
     | 
| 
       524 
529 
     | 
    
         
             
            {
         
     | 
| 
       525 
     | 
    
         
            -
                VectorI *vdup = vector_create_i(arena, v-> 
     | 
| 
      
 530 
     | 
    
         
            +
                VectorI *vdup = vector_create_i(arena, v->size);
         
     | 
| 
       526 
531 
     | 
    
         | 
| 
       527 
     | 
    
         
            -
                memcpy(vdup->values, v->values, sizeof(*(v->values)) * v-> 
     | 
| 
      
 532 
     | 
    
         
            +
                memcpy(vdup->values, v->values, sizeof(*(v->values)) * v->size);
         
     | 
| 
       528 
533 
     | 
    
         | 
| 
       529 
534 
     | 
    
         
             
                return vdup;
         
     | 
| 
       530 
535 
     | 
    
         
             
            }
         
     | 
| 
       531 
536 
     | 
    
         | 
| 
       532 
     | 
    
         
            -
            void vector_set_f(VectorF *v, uint32_t offset,  
     | 
| 
      
 537 
     | 
    
         
            +
            inline void vector_set_f(VectorF *v, uint32_t offset, LDouble value) {
         
     | 
| 
       533 
538 
     | 
    
         
             
                *(v->values + offset) = value;
         
     | 
| 
       534 
539 
     | 
    
         
             
            }
         
     | 
| 
       535 
540 
     | 
    
         | 
| 
       536 
     | 
    
         
            -
            void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
         
     | 
| 
      
 541 
     | 
    
         
            +
            inline void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
         
     | 
| 
       537 
542 
     | 
    
         
             
                *(v->values + offset) = value;
         
     | 
| 
       538 
543 
     | 
    
         
             
            }
         
     | 
| 
       539 
544 
     | 
    
         | 
| 
       540 
     | 
    
         
            -
            uint32_t vector_get_i(VectorI *v, uint32_t offset) {
         
     | 
| 
      
 545 
     | 
    
         
            +
            inline uint32_t vector_get_i(VectorI *v, uint32_t offset) {
         
     | 
| 
       541 
546 
     | 
    
         
             
                return *(v->values + offset);
         
     | 
| 
       542 
547 
     | 
    
         
             
            }
         
     | 
| 
       543 
548 
     | 
    
         | 
| 
       544 
     | 
    
         
            -
            void vector_downsize_i(VectorI *v, uint32_t new_size) {
         
     | 
| 
       545 
     | 
    
         
            -
                v-> 
     | 
| 
      
 549 
     | 
    
         
            +
            inline void vector_downsize_i(VectorI *v, uint32_t new_size) {
         
     | 
| 
      
 550 
     | 
    
         
            +
                v->size = new_size;
         
     | 
| 
       546 
551 
     | 
    
         
             
            }
         
     | 
| 
       547 
552 
     | 
    
         | 
| 
       548 
553 
     | 
    
         
             
            void vector_inspect_i(VectorI *v) {
         
     | 
| 
       549 
     | 
    
         
            -
                for (uint32_t i = 0; i < v-> 
     | 
| 
      
 554 
     | 
    
         
            +
                for (uint32_t i = 0; i < v->size - 1; i++)
         
     | 
| 
       550 
555 
     | 
    
         
             
                    printf("%u, ", vector_get_i(v, i));
         
     | 
| 
       551 
     | 
    
         
            -
                printf("%u\n", vector_get_i(v, v-> 
     | 
| 
      
 556 
     | 
    
         
            +
                printf("%u\n", vector_get_i(v, v->size - 1));
         
     | 
| 
       552 
557 
     | 
    
         
             
            }
         
     | 
| 
       553 
558 
     | 
    
         | 
| 
       554 
     | 
    
         
            -
             
     | 
| 
      
 559 
     | 
    
         
            +
            inline LDouble vector_get_f(VectorF *v, uint32_t offset) {
         
     | 
| 
       555 
560 
     | 
    
         
             
                return *(v->values + offset);
         
     | 
| 
       556 
561 
     | 
    
         
             
            }
         
     | 
| 
       557 
562 
     | 
    
         | 
| 
       558 
     | 
    
         
            -
             
     | 
| 
      
 563 
     | 
    
         
            +
            inline LDouble vector_get_diff_f(VectorF *v, uint32_t i, uint32_t j) {
         
     | 
| 
       559 
564 
     | 
    
         
             
                return *(v->values + i) - *(v->values + j);
         
     | 
| 
       560 
565 
     | 
    
         
             
            }
         
     | 
| 
       561 
566 
     | 
    
         | 
| 
       562 
567 
     | 
    
         
             
            void vector_inspect_f(VectorF *v) {
         
     | 
| 
       563 
     | 
    
         
            -
                for (uint32_t i = 0; i < v-> 
     | 
| 
      
 568 
     | 
    
         
            +
                for (uint32_t i = 0; i < v->size - 1; i++)
         
     | 
| 
       564 
569 
     | 
    
         
             
                    printf("%Lf, ", vector_get_f(v, i));
         
     | 
| 
       565 
     | 
    
         
            -
                printf("%Lf\n", vector_get_f(v, v-> 
     | 
| 
      
 570 
     | 
    
         
            +
                printf("%Lf\n", vector_get_f(v, v->size - 1));
         
     | 
| 
       566 
571 
     | 
    
         
             
            }
         
     | 
| 
       567 
572 
     | 
    
         | 
| 
       568 
573 
     | 
    
         
             
            MatrixF *matrix_create_f(Arena *arena, uint32_t nrows, uint32_t ncols) {
         
     | 
| 
         @@ -587,12 +592,12 @@ MatrixI *matrix_create_i(Arena *arena, uint32_t nrows, uint32_t ncols) { 
     | 
|
| 
       587 
592 
     | 
    
         
             
                return m;
         
     | 
| 
       588 
593 
     | 
    
         
             
            }
         
     | 
| 
       589 
594 
     | 
    
         | 
| 
       590 
     | 
    
         
            -
            void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j,  
     | 
| 
      
 595 
     | 
    
         
            +
            inline void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j, LDouble value) {
         
     | 
| 
       591 
596 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       592 
597 
     | 
    
         
             
                *(m->values + offset) = value;
         
     | 
| 
       593 
598 
     | 
    
         
             
            }
         
     | 
| 
       594 
599 
     | 
    
         | 
| 
       595 
     | 
    
         
            -
             
     | 
| 
      
 600 
     | 
    
         
            +
            inline LDouble matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
         
     | 
| 
       596 
601 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       597 
602 
     | 
    
         
             
                return *(m->values + offset);
         
     | 
| 
       598 
603 
     | 
    
         
             
            }
         
     | 
| 
         @@ -600,7 +605,7 @@ long double matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) { 
     | 
|
| 
       600 
605 
     | 
    
         
             
            void matrix_inspect_f(MatrixF *m) {
         
     | 
| 
       601 
606 
     | 
    
         
             
                for (uint32_t i = 0; i < m->nrows; i++) {
         
     | 
| 
       602 
607 
     | 
    
         
             
                    for (uint32_t j = 0; j < m->ncols - 1; j++) {
         
     | 
| 
       603 
     | 
    
         
            -
                         
     | 
| 
      
 608 
     | 
    
         
            +
                        LDouble value = matrix_get_f(m, i, j);
         
     | 
| 
       604 
609 
     | 
    
         | 
| 
       605 
610 
     | 
    
         
             
                        printf("%Lf, ", value);
         
     | 
| 
       606 
611 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -616,17 +621,17 @@ void matrix_inspect_i(MatrixI *m) { 
     | 
|
| 
       616 
621 
     | 
    
         
             
                }
         
     | 
| 
       617 
622 
     | 
    
         
             
            }
         
     | 
| 
       618 
623 
     | 
    
         | 
| 
       619 
     | 
    
         
            -
            void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
         
     | 
| 
      
 624 
     | 
    
         
            +
            inline void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
         
     | 
| 
       620 
625 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       621 
626 
     | 
    
         
             
                *(m->values + offset) = value;
         
     | 
| 
       622 
627 
     | 
    
         
             
            }
         
     | 
| 
       623 
628 
     | 
    
         | 
| 
       624 
     | 
    
         
            -
            uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
         
     | 
| 
      
 629 
     | 
    
         
            +
            inline uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
         
     | 
| 
       625 
630 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       626 
631 
     | 
    
         
             
                return *(m->values + offset);
         
     | 
| 
       627 
632 
     | 
    
         
             
            }
         
     | 
| 
       628 
633 
     | 
    
         | 
| 
       629 
     | 
    
         
            -
            Arena *arena_create( 
     | 
| 
      
 634 
     | 
    
         
            +
            Arena *arena_create(size_t capacity) {
         
     | 
| 
       630 
635 
     | 
    
         
             
                if (capacity < ARENA_MIN_CAPACITY) {
         
     | 
| 
       631 
636 
     | 
    
         
             
                    capacity = ARENA_MIN_CAPACITY;
         
     | 
| 
       632 
637 
     | 
    
         
             
                }
         
     | 
| 
         @@ -654,7 +659,7 @@ Arena *arena_create(uint32_t capacity) { 
     | 
|
| 
       654 
659 
     | 
    
         
             
                return arena;
         
     | 
| 
       655 
660 
     | 
    
         
             
            }
         
     | 
| 
       656 
661 
     | 
    
         | 
| 
       657 
     | 
    
         
            -
            void *arena_alloc(Arena *arena,  
     | 
| 
      
 662 
     | 
    
         
            +
            void *arena_alloc(Arena *arena, size_t size) {
         
     | 
| 
       658 
663 
     | 
    
         
             
                size = (size + 7) & ~7;
         
     | 
| 
       659 
664 
     | 
    
         | 
| 
       660 
665 
     | 
    
         
             
                if (arena->offset + size > arena->capacity) {
         
     | 
| 
         @@ -669,7 +674,8 @@ void *arena_alloc(Arena *arena, uint32_t size) { 
     | 
|
| 
       669 
674 
     | 
    
         
             
            }
         
     | 
| 
       670 
675 
     | 
    
         | 
| 
       671 
676 
     | 
    
         
             
            void arena_destroy(Arena *arena) {
         
     | 
| 
       672 
     | 
    
         
            -
                /*  
     | 
| 
      
 677 
     | 
    
         
            +
                /* double leftover = ((double) arena->capacity - arena->offset) / arena->capacity * 100; */
         
     | 
| 
      
 678 
     | 
    
         
            +
                /* printf("[Arena Destroy] Capacity: %zu, offset: %zu, left: %2.2f%%\n", arena->capacity, arena->offset, leftover); */
         
     | 
| 
       673 
679 
     | 
    
         
             
                free(arena->buffer);
         
     | 
| 
       674 
680 
     | 
    
         
             
                free(arena);
         
     | 
| 
       675 
681 
     | 
    
         
             
            }
         
     | 
    
        data/lib/ckmeans/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: ckmeans
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.0.3
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Vlad Lebedev
         
     | 
| 
       8 
8 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       9 
9 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       10 
     | 
    
         
            -
            date: 2025- 
     | 
| 
      
 10 
     | 
    
         
            +
            date: 2025-05-01 00:00:00.000000000 Z
         
     | 
| 
       11 
11 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       12 
12 
     | 
    
         
             
            description: Repeatable clustering of unidimensional data
         
     | 
| 
       13 
13 
     | 
    
         
             
            email:
         
     | 
| 
         @@ -17,11 +17,13 @@ extensions: 
     | 
|
| 
       17 
17 
     | 
    
         
             
            - ext/ckmeans/extconf.rb
         
     | 
| 
       18 
18 
     | 
    
         
             
            extra_rdoc_files: []
         
     | 
| 
       19 
19 
     | 
    
         
             
            files:
         
     | 
| 
      
 20 
     | 
    
         
            +
            - ".dockerignore"
         
     | 
| 
       20 
21 
     | 
    
         
             
            - ".rspec"
         
     | 
| 
       21 
22 
     | 
    
         
             
            - ".rubocop.yml"
         
     | 
| 
       22 
23 
     | 
    
         
             
            - ".rubocop_todo.yml"
         
     | 
| 
       23 
24 
     | 
    
         
             
            - ".ruby-version"
         
     | 
| 
       24 
25 
     | 
    
         
             
            - CHANGELOG.md
         
     | 
| 
      
 26 
     | 
    
         
            +
            - Dockerfile
         
     | 
| 
       25 
27 
     | 
    
         
             
            - LICENSE
         
     | 
| 
       26 
28 
     | 
    
         
             
            - README.md
         
     | 
| 
       27 
29 
     | 
    
         
             
            - Rakefile
         
     |