ckmeans 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.dockerignore +13 -0
 - data/.rubocop_todo.yml +3 -39
 - data/CHANGELOG.md +9 -0
 - data/Dockerfile +11 -0
 - data/ext/ckmeans/extensions.c +191 -214
 - data/lib/ckmeans/clusterer.rb +1 -297
 - data/lib/ckmeans/version.rb +1 -1
 - metadata +4 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 5ad7e8c24dd367d5e6a6dd66abc529ae92079cf99d1c781a7646c929547b0e62
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 2e338ca878eba2d250ca61fff2ea8bee44ec8387b37e12b31600edf9da2b7130
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 8c59e1e159cc9cada8afed9e016a5d8956cfe909bb7b7d82c8d155f388fdf1924a49072d37e52065fa643a539da3a192767eddb38da95b2c2524bcc7d0a39ebd
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: f2b535377d441bc1f2ee309a5466c8231b425aa0dd9b0512aa36257defa12b3b645694ae953b2b5e3b6997c50bde796e8fa1c2f8f10d4055b1cc9cb6abcf1353
         
     | 
    
        data/.dockerignore
    ADDED
    
    
    
        data/.rubocop_todo.yml
    CHANGED
    
    | 
         @@ -1,49 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # This configuration was generated by
         
     | 
| 
       2 
2 
     | 
    
         
             
            # `rubocop --auto-gen-config`
         
     | 
| 
       3 
     | 
    
         
            -
            # on 2025-04- 
     | 
| 
      
 3 
     | 
    
         
            +
            # on 2025-04-24 06:16:37 UTC using RuboCop version 1.75.1.
         
     | 
| 
       4 
4 
     | 
    
         
             
            # The point is for the user to remove these configuration records
         
     | 
| 
       5 
5 
     | 
    
         
             
            # one by one as the offenses are removed from the code base.
         
     | 
| 
       6 
6 
     | 
    
         
             
            # Note that changes in the inspected code, or installation of new
         
     | 
| 
       7 
7 
     | 
    
         
             
            # versions of RuboCop, may require this file to be generated again.
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
     | 
    
         
            -
            # Offense count:  
     | 
| 
       10 
     | 
    
         
            -
            # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
         
     | 
| 
       11 
     | 
    
         
            -
            Metrics/AbcSize:
         
     | 
| 
       12 
     | 
    
         
            -
              Max: 95
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
            # Offense count: 2
         
     | 
| 
      
 9 
     | 
    
         
            +
            # Offense count: 1
         
     | 
| 
       15 
10 
     | 
    
         
             
            # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
         
     | 
| 
       16 
11 
     | 
    
         
             
            # AllowedMethods: refine
         
     | 
| 
       17 
12 
     | 
    
         
             
            Metrics/BlockLength:
         
     | 
| 
       18 
     | 
    
         
            -
              Max:  
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
            # Offense count: 3
         
     | 
| 
       21 
     | 
    
         
            -
            # Configuration parameters: AllowedMethods, AllowedPatterns.
         
     | 
| 
       22 
     | 
    
         
            -
            Metrics/CyclomaticComplexity:
         
     | 
| 
       23 
     | 
    
         
            -
              Max: 10
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
            # Offense count: 6
         
     | 
| 
       26 
     | 
    
         
            -
            # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
         
     | 
| 
       27 
     | 
    
         
            -
            Metrics/MethodLength:
         
     | 
| 
       28 
     | 
    
         
            -
              Max: 48
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
            # Offense count: 3
         
     | 
| 
       31 
     | 
    
         
            -
            # Configuration parameters: AllowedMethods, AllowedPatterns.
         
     | 
| 
       32 
     | 
    
         
            -
            Metrics/PerceivedComplexity:
         
     | 
| 
       33 
     | 
    
         
            -
              Max: 13
         
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
            # Offense count: 12
         
     | 
| 
       36 
     | 
    
         
            -
            # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
         
     | 
| 
       37 
     | 
    
         
            -
            # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
         
     | 
| 
       38 
     | 
    
         
            -
            Naming/MethodParameterName:
         
     | 
| 
       39 
     | 
    
         
            -
              Exclude:
         
     | 
| 
       40 
     | 
    
         
            -
                - 'lib/ckmeans/clusterer.rb'
         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
            # Offense count: 5
         
     | 
| 
       43 
     | 
    
         
            -
            # This cop supports unsafe autocorrection (--autocorrect-all).
         
     | 
| 
       44 
     | 
    
         
            -
            # Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns.
         
     | 
| 
       45 
     | 
    
         
            -
            # SupportedStyles: predicate, comparison
         
     | 
| 
       46 
     | 
    
         
            -
            Style/NumericPredicate:
         
     | 
| 
       47 
     | 
    
         
            -
              Exclude:
         
     | 
| 
       48 
     | 
    
         
            -
                - 'spec/**/*'
         
     | 
| 
       49 
     | 
    
         
            -
                - 'lib/ckmeans/clusterer.rb'
         
     | 
| 
      
 13 
     | 
    
         
            +
              Max: 26
         
     | 
    
        data/CHANGELOG.md
    CHANGED
    
    | 
         @@ -1,5 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ## [Unreleased]
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
      
 3 
     | 
    
         
            +
            ## [1.0.1] - 2025-04-24
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            - https://github.com/vlebedeff/rb-ckmeans/pull/9
         
     | 
| 
      
 6 
     | 
    
         
            +
            - https://github.com/vlebedeff/rb-ckmeans/pull/8
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ## [1.0.0] - 2025-04-22
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            - https://github.com/vlebedeff/rb-ckmeans/pull/6
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
       3 
12 
     | 
    
         
             
            ## [0.1.2] - 2025-03-31
         
     | 
| 
       4 
13 
     | 
    
         | 
| 
       5 
14 
     | 
    
         
             
            - https://github.com/vlebedeff/rb-ckmeans/pull/3
         
     | 
    
        data/Dockerfile
    ADDED
    
    
    
        data/ext/ckmeans/extensions.c
    CHANGED
    
    | 
         @@ -1,18 +1,20 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            #include <stdio.h>
         
     | 
| 
       2 
     | 
    
         
            -
            #include <assert.h>
         
     | 
| 
       3 
2 
     | 
    
         
             
            #include <math.h>
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include <string.h>
         
     | 
| 
       4 
4 
     | 
    
         
             
            #include "ruby.h"
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
      
 6 
     | 
    
         
            +
            typedef long double LDouble;
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
       6 
8 
     | 
    
         
             
            typedef struct Arena {
         
     | 
| 
       7 
     | 
    
         
            -
                 
     | 
| 
       8 
     | 
    
         
            -
                 
     | 
| 
      
 9 
     | 
    
         
            +
                size_t capacity;
         
     | 
| 
      
 10 
     | 
    
         
            +
                size_t offset;
         
     | 
| 
       9 
11 
     | 
    
         
             
                uint8_t  *buffer;
         
     | 
| 
       10 
12 
     | 
    
         
             
            } Arena;
         
     | 
| 
       11 
13 
     | 
    
         | 
| 
       12 
14 
     | 
    
         
             
            typedef struct MatrixF {
         
     | 
| 
       13 
15 
     | 
    
         
             
                uint32_t ncols;
         
     | 
| 
       14 
16 
     | 
    
         
             
                uint32_t nrows;
         
     | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
      
 17 
     | 
    
         
            +
                LDouble *values;
         
     | 
| 
       16 
18 
     | 
    
         
             
            } MatrixF;
         
     | 
| 
       17 
19 
     | 
    
         | 
| 
       18 
20 
     | 
    
         
             
            typedef struct MatrixI {
         
     | 
| 
         @@ -22,12 +24,12 @@ typedef struct MatrixI { 
     | 
|
| 
       22 
24 
     | 
    
         
             
            } MatrixI;
         
     | 
| 
       23 
25 
     | 
    
         | 
| 
       24 
26 
     | 
    
         
             
            typedef struct VectorF {
         
     | 
| 
       25 
     | 
    
         
            -
                uint32_t  
     | 
| 
       26 
     | 
    
         
            -
                 
     | 
| 
      
 27 
     | 
    
         
            +
                uint32_t size;
         
     | 
| 
      
 28 
     | 
    
         
            +
                LDouble *values;
         
     | 
| 
       27 
29 
     | 
    
         
             
            } VectorF;
         
     | 
| 
       28 
30 
     | 
    
         | 
| 
       29 
31 
     | 
    
         
             
            typedef struct VectorI {
         
     | 
| 
       30 
     | 
    
         
            -
                uint32_t  
     | 
| 
      
 32 
     | 
    
         
            +
                uint32_t size;
         
     | 
| 
       31 
33 
     | 
    
         
             
                uint32_t *values;
         
     | 
| 
       32 
34 
     | 
    
         
             
            } VectorI;
         
     | 
| 
       33 
35 
     | 
    
         | 
| 
         @@ -52,84 +54,82 @@ typedef struct RowParams { 
     | 
|
| 
       52 
54 
     | 
    
         
             
            } RowParams;
         
     | 
| 
       53 
55 
     | 
    
         | 
| 
       54 
56 
     | 
    
         
             
            typedef struct {
         
     | 
| 
       55 
     | 
    
         
            -
                 
     | 
| 
       56 
     | 
    
         
            -
                 
     | 
| 
      
 57 
     | 
    
         
            +
                LDouble mean;
         
     | 
| 
      
 58 
     | 
    
         
            +
                LDouble variance;
         
     | 
| 
       57 
59 
     | 
    
         
             
            } SegmentStats;
         
     | 
| 
       58 
60 
     | 
    
         | 
| 
       59 
     | 
    
         
            -
            VALUE 
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
            Arena 
     | 
| 
       62 
     | 
    
         
            -
            void 
     | 
| 
       63 
     | 
    
         
            -
            void 
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
            void 
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
            VectorI 
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
            void 
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
            long double  dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
      
 61 
     | 
    
         
            +
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            Arena *arena_create(size_t);
         
     | 
| 
      
 64 
     | 
    
         
            +
            void  *arena_alloc(Arena*, size_t);
         
     | 
| 
      
 65 
     | 
    
         
            +
            void  arena_destroy(Arena*);
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            MatrixF  *matrix_create_f(Arena*, uint32_t, uint32_t);
         
     | 
| 
      
 68 
     | 
    
         
            +
            MatrixI  *matrix_create_i(Arena*, uint32_t, uint32_t);
         
     | 
| 
      
 69 
     | 
    
         
            +
            void     matrix_set_f(MatrixF*, uint32_t, uint32_t, LDouble value);
         
     | 
| 
      
 70 
     | 
    
         
            +
            LDouble  matrix_get_f(MatrixF*, uint32_t, uint32_t);
         
     | 
| 
      
 71 
     | 
    
         
            +
            void     matrix_inspect_f(MatrixF*);
         
     | 
| 
      
 72 
     | 
    
         
            +
            void     matrix_set_i(MatrixI*, uint32_t, uint32_t, uint32_t value);
         
     | 
| 
      
 73 
     | 
    
         
            +
            uint32_t matrix_get_i(MatrixI*, uint32_t, uint32_t);
         
     | 
| 
      
 74 
     | 
    
         
            +
            void     matrix_inspect_i(MatrixI*);
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
            VectorF  *vector_create_f(Arena*, uint32_t);
         
     | 
| 
      
 77 
     | 
    
         
            +
            void     vector_set_f(VectorF*, uint32_t offset, LDouble value);
         
     | 
| 
      
 78 
     | 
    
         
            +
            LDouble  vector_get_f(VectorF*, uint32_t offset);
         
     | 
| 
      
 79 
     | 
    
         
            +
            LDouble  vector_get_diff_f(VectorF*, uint32_t, uint32_t);
         
     | 
| 
      
 80 
     | 
    
         
            +
            void     vector_inspect_f(VectorF*);
         
     | 
| 
      
 81 
     | 
    
         
            +
            VectorI  *vector_create_i(Arena*, uint32_t);
         
     | 
| 
      
 82 
     | 
    
         
            +
            VectorI  *vector_dup_i(VectorI*, Arena*);
         
     | 
| 
      
 83 
     | 
    
         
            +
            void     vector_set_i(VectorI*, uint32_t offset, uint32_t value);
         
     | 
| 
      
 84 
     | 
    
         
            +
            uint32_t vector_get_i(VectorI*, uint32_t offset);
         
     | 
| 
      
 85 
     | 
    
         
            +
            void     vector_downsize_i(VectorI*, uint32_t);
         
     | 
| 
      
 86 
     | 
    
         
            +
            void     vector_inspect_i(VectorI*);
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
            LDouble      dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
         
     | 
| 
       88 
89 
     | 
    
         
             
            void         fill_row(State, uint32_t, uint32_t, uint32_t);
         
     | 
| 
       89 
90 
     | 
    
         
             
            void         smawk(State, RowParams, VectorI*);
         
     | 
| 
       90 
91 
     | 
    
         
             
            void         find_min_from_candidates(State, RowParams, VectorI*);
         
     | 
| 
       91 
     | 
    
         
            -
            VectorI 
     | 
| 
      
 92 
     | 
    
         
            +
            VectorI      *prune_candidates(State, RowParams, VectorI*);
         
     | 
| 
       92 
93 
     | 
    
         
             
            void         fill_even_positions(State, RowParams, VectorI*);
         
     | 
| 
       93 
94 
     | 
    
         
             
            SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t);
         
     | 
| 
       94 
     | 
    
         
            -
            VectorI 
     | 
| 
      
 95 
     | 
    
         
            +
            VectorI      *backtrack_sizes(State, VectorI*, uint32_t);
         
     | 
| 
       95 
96 
     | 
    
         
             
            uint32_t     find_koptimal(State);
         
     | 
| 
       96 
97 
     | 
    
         | 
| 
       97 
98 
     | 
    
         
             
            void Init_extensions(void) {
         
     | 
| 
       98 
     | 
    
         
            -
                VALUE ckmeans_module 
     | 
| 
      
 99 
     | 
    
         
            +
                VALUE ckmeans_module  = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
         
     | 
| 
       99 
100 
     | 
    
         
             
                VALUE clusterer_class = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
         
     | 
| 
       100 
101 
     | 
    
         | 
| 
       101 
102 
     | 
    
         
             
                rb_define_private_method(clusterer_class, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
         
     | 
| 
       102 
103 
     | 
    
         
             
            }
         
     | 
| 
       103 
104 
     | 
    
         | 
| 
       104 
     | 
    
         
            -
            # define ARENA_MIN_CAPACITY  
     | 
| 
       105 
     | 
    
         
            -
            # define ALLOCATION_FACTOR 20
         
     | 
| 
      
 105 
     | 
    
         
            +
            # define ARENA_MIN_CAPACITY 100
         
     | 
| 
       106 
106 
     | 
    
         
             
            # define PIx2 (M_PI * 2.0)
         
     | 
| 
       107 
107 
     | 
    
         | 
| 
       108 
     | 
    
         
            -
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self) 
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
                 
     | 
| 
       111 
     | 
    
         
            -
                 
     | 
| 
       112 
     | 
    
         
            -
                 
     | 
| 
       113 
     | 
    
         
            -
                 
     | 
| 
       114 
     | 
    
         
            -
                 
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
                 
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
             
     | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
                     
     | 
| 
       122 
     | 
    
         
            -
             
     | 
| 
      
 108 
     | 
    
         
            +
            VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
         
     | 
| 
      
 109 
     | 
    
         
            +
            {
         
     | 
| 
      
 110 
     | 
    
         
            +
                uint32_t xcount      = NUM2UINT(rb_iv_get(self, "@xcount"));
         
     | 
| 
      
 111 
     | 
    
         
            +
                uint32_t kmin        = NUM2UINT(rb_iv_get(self, "@kmin"));
         
     | 
| 
      
 112 
     | 
    
         
            +
                uint32_t kmax        = NUM2UINT(rb_iv_get(self, "@kmax"));
         
     | 
| 
      
 113 
     | 
    
         
            +
                bool apply_deviation = RTEST(rb_iv_get(self, "@apply_bic_deviation"));
         
     | 
| 
      
 114 
     | 
    
         
            +
                VALUE rb_xsorted     = rb_iv_get(self, "@xsorted");
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                Arena *arena         =
         
     | 
| 
      
 117 
     | 
    
         
            +
                    arena_create(
         
     | 
| 
      
 118 
     | 
    
         
            +
                        sizeof(LDouble) * xcount * (kmax + 4) +
         
     | 
| 
      
 119 
     | 
    
         
            +
                        sizeof(uint32_t) * xcount * kmax * 5 +
         
     | 
| 
      
 120 
     | 
    
         
            +
                        ARENA_MIN_CAPACITY
         
     | 
| 
      
 121 
     | 
    
         
            +
                    );
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
         
     | 
| 
       123 
124 
     | 
    
         | 
| 
       124 
125 
     | 
    
         
             
                MatrixF *cost    = matrix_create_f(arena, kmax, xcount);
         
     | 
| 
       125 
126 
     | 
    
         
             
                MatrixI *splits  = matrix_create_i(arena, kmax, xcount);
         
     | 
| 
       126 
127 
     | 
    
         
             
                VectorF *xsorted = vector_create_f(arena, xcount);
         
     | 
| 
       127 
     | 
    
         
            -
                /* TODO: pack sums into one vector of pairs */
         
     | 
| 
       128 
128 
     | 
    
         
             
                VectorF *xsum    = vector_create_f(arena, xcount);
         
     | 
| 
       129 
129 
     | 
    
         
             
                VectorF *xsumsq  = vector_create_f(arena, xcount);
         
     | 
| 
       130 
130 
     | 
    
         | 
| 
       131 
131 
     | 
    
         
             
                for (uint32_t i = 0; i < xcount; i++) {
         
     | 
| 
       132 
     | 
    
         
            -
                     
     | 
| 
      
 132 
     | 
    
         
            +
                    LDouble xi = NUM2DBL(rb_ary_entry(rb_xsorted, i));
         
     | 
| 
       133 
133 
     | 
    
         
             
                    vector_set_f(xsorted, i, xi);
         
     | 
| 
       134 
134 
     | 
    
         
             
                }
         
     | 
| 
       135 
135 
     | 
    
         | 
| 
         @@ -147,17 +147,17 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       147 
147 
     | 
    
         
             
                };
         
     | 
| 
       148 
148 
     | 
    
         | 
| 
       149 
149 
     | 
    
         | 
| 
       150 
     | 
    
         
            -
                 
     | 
| 
       151 
     | 
    
         
            -
                 
     | 
| 
      
 150 
     | 
    
         
            +
                LDouble shift        = vector_get_f(xsorted, xcount / 2);
         
     | 
| 
      
 151 
     | 
    
         
            +
                LDouble diff_initial = vector_get_f(xsorted, 0) - shift;
         
     | 
| 
       152 
152 
     | 
    
         | 
| 
       153 
153 
     | 
    
         
             
                vector_set_f(xsum, 0, diff_initial);
         
     | 
| 
       154 
154 
     | 
    
         
             
                vector_set_f(xsumsq, 0, diff_initial * diff_initial);
         
     | 
| 
       155 
155 
     | 
    
         | 
| 
       156 
156 
     | 
    
         
             
                for (uint32_t i = 1; i < xcount; i++) {
         
     | 
| 
       157 
     | 
    
         
            -
                     
     | 
| 
       158 
     | 
    
         
            -
                     
     | 
| 
       159 
     | 
    
         
            -
                     
     | 
| 
       160 
     | 
    
         
            -
                     
     | 
| 
      
 157 
     | 
    
         
            +
                    LDouble xi          = vector_get_f(xsorted, i);
         
     | 
| 
      
 158 
     | 
    
         
            +
                    LDouble xsum_prev   = vector_get_f(xsum, i - 1);
         
     | 
| 
      
 159 
     | 
    
         
            +
                    LDouble xsumsq_prev = vector_get_f(xsumsq, i - 1);
         
     | 
| 
      
 160 
     | 
    
         
            +
                    LDouble diff        = xi - shift;
         
     | 
| 
       161 
161 
     | 
    
         | 
| 
       162 
162 
     | 
    
         
             
                    vector_set_f(xsum, i, xsum_prev + diff);
         
     | 
| 
       163 
163 
     | 
    
         
             
                    vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
         
     | 
| 
         @@ -172,7 +172,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       172 
172 
     | 
    
         | 
| 
       173 
173 
     | 
    
         
             
                uint32_t koptimal = find_koptimal(state);
         
     | 
| 
       174 
174 
     | 
    
         | 
| 
       175 
     | 
    
         
            -
                VectorI *sizes =  
     | 
| 
      
 175 
     | 
    
         
            +
                VectorI *sizes = vector_create_i(arena, koptimal);
         
     | 
| 
      
 176 
     | 
    
         
            +
                backtrack_sizes(state, sizes, koptimal);
         
     | 
| 
       176 
177 
     | 
    
         | 
| 
       177 
178 
     | 
    
         
             
                /* printf("XSORTED \t"); vector_inspect_f(xsorted); */
         
     | 
| 
       178 
179 
     | 
    
         
             
                /* printf("K OPTIMAL: %lld\n", koptimal); */
         
     | 
| 
         @@ -180,8 +181,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       180 
181 
     | 
    
         
             
                /* printf("FINAL COST\n"); matrix_inspect_f(cost); */
         
     | 
| 
       181 
182 
     | 
    
         
             
                /* printf("FINAL SPLITS\n"); matrix_inspect_i(splits); */
         
     | 
| 
       182 
183 
     | 
    
         | 
| 
       183 
     | 
    
         
            -
                VALUE response = rb_ary_new2(sizes-> 
     | 
| 
       184 
     | 
    
         
            -
                for (uint32_t i = 0; i < sizes-> 
     | 
| 
      
 184 
     | 
    
         
            +
                VALUE response = rb_ary_new2(sizes->size);
         
     | 
| 
      
 185 
     | 
    
         
            +
                for (uint32_t i = 0; i < sizes->size; i++) {
         
     | 
| 
       185 
186 
     | 
    
         
             
                    VALUE size = LONG2NUM(vector_get_i(sizes, i));
         
     | 
| 
       186 
187 
     | 
    
         
             
                    rb_ary_store(response, i, size);
         
     | 
| 
       187 
188 
     | 
    
         
             
                }
         
     | 
| 
         @@ -193,29 +194,30 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) { 
     | 
|
| 
       193 
194 
     | 
    
         | 
| 
       194 
195 
     | 
    
         
             
            uint32_t find_koptimal(State state)
         
     | 
| 
       195 
196 
     | 
    
         
             
            {
         
     | 
| 
       196 
     | 
    
         
            -
                uint32_t kmin 
     | 
| 
       197 
     | 
    
         
            -
                uint32_t kmax 
     | 
| 
       198 
     | 
    
         
            -
                uint32_t xcount 
     | 
| 
       199 
     | 
    
         
            -
                uint32_t kopt 
     | 
| 
       200 
     | 
    
         
            -
                uint32_t xindex_max 
     | 
| 
       201 
     | 
    
         
            -
                VectorF *xsorted 
     | 
| 
       202 
     | 
    
         
            -
                 
     | 
| 
       203 
     | 
    
         
            -
                 
     | 
| 
       204 
     | 
    
         
            -
                 
     | 
| 
       205 
     | 
    
         
            -
                 
     | 
| 
       206 
     | 
    
         
            -
             
     | 
| 
      
 197 
     | 
    
         
            +
                uint32_t kmin       = state.kmin;
         
     | 
| 
      
 198 
     | 
    
         
            +
                uint32_t kmax       = state.kmax;
         
     | 
| 
      
 199 
     | 
    
         
            +
                uint32_t xcount     = state.xcount;
         
     | 
| 
      
 200 
     | 
    
         
            +
                uint32_t kopt       = kmin;
         
     | 
| 
      
 201 
     | 
    
         
            +
                uint32_t xindex_max = state.xcount - 1;
         
     | 
| 
      
 202 
     | 
    
         
            +
                VectorF *xsorted    = state.xsorted;
         
     | 
| 
      
 203 
     | 
    
         
            +
                LDouble x0          = vector_get_f(xsorted, 0);
         
     | 
| 
      
 204 
     | 
    
         
            +
                LDouble xn          = vector_get_f(xsorted, xindex_max);
         
     | 
| 
      
 205 
     | 
    
         
            +
                LDouble max_bic     = 0.0;
         
     | 
| 
      
 206 
     | 
    
         
            +
                LDouble xcount_log  = log((LDouble) xcount);
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                VectorI *sizes = vector_create_i(state.arena, kmax);
         
     | 
| 
       207 
209 
     | 
    
         
             
                for (uint32_t k = kmin; k <= kmax; k++) {
         
     | 
| 
       208 
210 
     | 
    
         
             
                    uint32_t index_right, index_left = 0;
         
     | 
| 
       209 
     | 
    
         
            -
                     
     | 
| 
       210 
     | 
    
         
            -
                     
     | 
| 
      
 211 
     | 
    
         
            +
                    LDouble bin_left, bin_right, loglikelihood = 0.0;
         
     | 
| 
      
 212 
     | 
    
         
            +
                    backtrack_sizes(state, sizes, k);
         
     | 
| 
       211 
213 
     | 
    
         | 
| 
       212 
214 
     | 
    
         
             
                    for (uint32_t kb = 0; kb < k; kb++) {
         
     | 
| 
       213 
     | 
    
         
            -
                        uint32_t npoints 
     | 
| 
       214 
     | 
    
         
            -
                        index_right 
     | 
| 
       215 
     | 
    
         
            -
                         
     | 
| 
       216 
     | 
    
         
            -
                         
     | 
| 
       217 
     | 
    
         
            -
                        bin_left 
     | 
| 
       218 
     | 
    
         
            -
                        bin_right 
     | 
| 
      
 215 
     | 
    
         
            +
                        uint32_t npoints = vector_get_i(sizes, kb);
         
     | 
| 
      
 216 
     | 
    
         
            +
                        index_right      = index_left + npoints - 1;
         
     | 
| 
      
 217 
     | 
    
         
            +
                        LDouble xleft    = vector_get_f(xsorted, index_left);
         
     | 
| 
      
 218 
     | 
    
         
            +
                        LDouble xright   = vector_get_f(xsorted, index_right);
         
     | 
| 
      
 219 
     | 
    
         
            +
                        bin_left         = xleft;
         
     | 
| 
      
 220 
     | 
    
         
            +
                        bin_right        = xright;
         
     | 
| 
       219 
221 
     | 
    
         | 
| 
       220 
222 
     | 
    
         
             
                        if (xleft == xright) {
         
     | 
| 
       221 
223 
     | 
    
         
             
                            bin_left  = index_left == 0
         
     | 
| 
         @@ -226,18 +228,18 @@ uint32_t find_koptimal(State state) 
     | 
|
| 
       226 
228 
     | 
    
         
             
                                : xn;
         
     | 
| 
       227 
229 
     | 
    
         
             
                        }
         
     | 
| 
       228 
230 
     | 
    
         | 
| 
       229 
     | 
    
         
            -
                         
     | 
| 
       230 
     | 
    
         
            -
                        SegmentStats stats 
     | 
| 
       231 
     | 
    
         
            -
                         
     | 
| 
       232 
     | 
    
         
            -
                         
     | 
| 
      
 231 
     | 
    
         
            +
                        LDouble bin_width  = bin_right - bin_left;
         
     | 
| 
      
 232 
     | 
    
         
            +
                        SegmentStats stats = shifted_data_variance(xsorted, index_left, index_right);
         
     | 
| 
      
 233 
     | 
    
         
            +
                        LDouble mean       = stats.mean;
         
     | 
| 
      
 234 
     | 
    
         
            +
                        LDouble variance   = stats.variance;
         
     | 
| 
       233 
235 
     | 
    
         | 
| 
       234 
236 
     | 
    
         
             
                        if (variance > 0) {
         
     | 
| 
       235 
237 
     | 
    
         
             
                            for (uint32_t i = index_left; i <= index_right; i++) {
         
     | 
| 
       236 
     | 
    
         
            -
                                 
     | 
| 
      
 238 
     | 
    
         
            +
                                LDouble xi     = vector_get_f(xsorted, i);
         
     | 
| 
       237 
239 
     | 
    
         
             
                                loglikelihood += -(xi - mean) * (xi - mean) / (2.0 * variance);
         
     | 
| 
       238 
240 
     | 
    
         
             
                            }
         
     | 
| 
       239 
241 
     | 
    
         
             
                            loglikelihood += npoints * (
         
     | 
| 
       240 
     | 
    
         
            -
                                (log(npoints / ( 
     | 
| 
      
 242 
     | 
    
         
            +
                                (state.apply_deviation ? 0.0 : log(npoints / (LDouble) xcount)) -
         
     | 
| 
       241 
243 
     | 
    
         
             
                                (0.5 * log(PIx2 * variance))
         
     | 
| 
       242 
244 
     | 
    
         
             
                            );
         
     | 
| 
       243 
245 
     | 
    
         
             
                        } else {
         
     | 
| 
         @@ -247,24 +249,23 @@ uint32_t find_koptimal(State state) 
     | 
|
| 
       247 
249 
     | 
    
         
             
                        index_left = index_right + 1;
         
     | 
| 
       248 
250 
     | 
    
         
             
                    }
         
     | 
| 
       249 
251 
     | 
    
         | 
| 
       250 
     | 
    
         
            -
                     
     | 
| 
      
 252 
     | 
    
         
            +
                    LDouble bic = (2.0 * loglikelihood) - (((3 * k) - 1) * xcount_log);
         
     | 
| 
       251 
253 
     | 
    
         | 
| 
       252 
254 
     | 
    
         
             
                    if (k == kmin) {
         
     | 
| 
       253 
255 
     | 
    
         
             
                        max_bic = bic;
         
     | 
| 
       254 
     | 
    
         
            -
                        kopt 
     | 
| 
      
 256 
     | 
    
         
            +
                        kopt    = kmin;
         
     | 
| 
       255 
257 
     | 
    
         
             
                    } else if (bic > max_bic) {
         
     | 
| 
       256 
258 
     | 
    
         
             
                        max_bic = bic;
         
     | 
| 
       257 
     | 
    
         
            -
                        kopt 
     | 
| 
      
 259 
     | 
    
         
            +
                        kopt    = k;
         
     | 
| 
       258 
260 
     | 
    
         
             
                    }
         
     | 
| 
       259 
261 
     | 
    
         
             
                }
         
     | 
| 
       260 
262 
     | 
    
         | 
| 
       261 
263 
     | 
    
         
             
                return kopt;
         
     | 
| 
       262 
264 
     | 
    
         
             
            }
         
     | 
| 
       263 
265 
     | 
    
         | 
| 
       264 
     | 
    
         
            -
            VectorI *backtrack_sizes(State state, uint32_t k)
         
     | 
| 
      
 266 
     | 
    
         
            +
            VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
         
     | 
| 
       265 
267 
     | 
    
         
             
            {
         
     | 
| 
       266 
268 
     | 
    
         
             
                MatrixI *splits = state.splits;
         
     | 
| 
       267 
     | 
    
         
            -
                VectorI *sizes  = vector_create_i(state.arena, k);
         
     | 
| 
       268 
269 
     | 
    
         
             
                uint32_t xcount = state.xcount;
         
     | 
| 
       269 
270 
     | 
    
         
             
                uint32_t right  = xcount - 1;
         
     | 
| 
       270 
271 
     | 
    
         
             
                uint32_t left   = 0;
         
     | 
| 
         @@ -274,6 +275,7 @@ VectorI *backtrack_sizes(State state, uint32_t k) 
     | 
|
| 
       274 
275 
     | 
    
         
             
                    left = matrix_get_i(splits, i, right);
         
     | 
| 
       275 
276 
     | 
    
         
             
                    vector_set_i(sizes, i, right - left + 1);
         
     | 
| 
       276 
277 
     | 
    
         
             
                }
         
     | 
| 
      
 278 
     | 
    
         
            +
                // Special case outside of the loop removing the need for conditionals
         
     | 
| 
       277 
279 
     | 
    
         
             
                left = matrix_get_i(splits, 0, right);
         
     | 
| 
       278 
280 
     | 
    
         
             
                vector_set_i(sizes, 0, right - left + 1);
         
     | 
| 
       279 
281 
     | 
    
         | 
| 
         @@ -282,16 +284,16 @@ VectorI *backtrack_sizes(State state, uint32_t k) 
     | 
|
| 
       282 
284 
     | 
    
         | 
| 
       283 
285 
     | 
    
         
             
            SegmentStats shifted_data_variance(VectorF *xsorted, uint32_t left, uint32_t right)
         
     | 
| 
       284 
286 
     | 
    
         
             
            {
         
     | 
| 
       285 
     | 
    
         
            -
                const uint32_t n 
     | 
| 
       286 
     | 
    
         
            -
                 
     | 
| 
       287 
     | 
    
         
            -
                 
     | 
| 
      
 287 
     | 
    
         
            +
                const uint32_t n   = right - left + 1;
         
     | 
| 
      
 288 
     | 
    
         
            +
                LDouble sum        = 0.0;
         
     | 
| 
      
 289 
     | 
    
         
            +
                LDouble sumsq      = 0.0;
         
     | 
| 
       288 
290 
     | 
    
         
             
                SegmentStats stats = { .mean = 0.0, .variance = 0.0 };
         
     | 
| 
       289 
291 
     | 
    
         | 
| 
       290 
292 
     | 
    
         
             
                if (right >= left) {
         
     | 
| 
       291 
     | 
    
         
            -
                    const  
     | 
| 
      
 293 
     | 
    
         
            +
                    const LDouble median = vector_get_f(xsorted, (left + right) / 2);
         
     | 
| 
       292 
294 
     | 
    
         | 
| 
       293 
295 
     | 
    
         
             
                    for (uint32_t i = left; i <= right; i++) {
         
     | 
| 
       294 
     | 
    
         
            -
                        const  
     | 
| 
      
 296 
     | 
    
         
            +
                        const LDouble sumi = vector_get_f(xsorted, i) - median;
         
     | 
| 
       295 
297 
     | 
    
         | 
| 
       296 
298 
     | 
    
         
             
                        sum   += sumi;
         
     | 
| 
       297 
299 
     | 
    
         
             
                        sumsq += sumi * sumi;
         
     | 
| 
         @@ -306,7 +308,8 @@ SegmentStats shifted_data_variance(VectorF *xsorted, uint32_t left, uint32_t rig 
     | 
|
| 
       306 
308 
     | 
    
         
             
                return stats;
         
     | 
| 
       307 
309 
     | 
    
         
             
            }
         
     | 
| 
       308 
310 
     | 
    
         | 
| 
       309 
     | 
    
         
            -
            void fill_row(State state, uint32_t q, uint32_t imin, uint32_t imax) 
     | 
| 
      
 311 
     | 
    
         
            +
            void fill_row(State state, uint32_t q, uint32_t imin, uint32_t imax)
         
     | 
| 
      
 312 
     | 
    
         
            +
            {
         
     | 
| 
       310 
313 
     | 
    
         
             
                uint32_t size = imax - q + 1;
         
     | 
| 
       311 
314 
     | 
    
         
             
                VectorI *split_candidates = vector_create_i(state.arena, size);
         
     | 
| 
       312 
315 
     | 
    
         
             
                for (uint32_t i = 0; i < size; i++) {
         
     | 
| 
         @@ -316,7 +319,8 @@ void fill_row(State state, uint32_t q, uint32_t imin, uint32_t imax) { 
     | 
|
| 
       316 
319 
     | 
    
         
             
                smawk(state, rparams, split_candidates);
         
     | 
| 
       317 
320 
     | 
    
         
             
            }
         
     | 
| 
       318 
321 
     | 
    
         | 
| 
       319 
     | 
    
         
            -
            void smawk(State state, RowParams rparams, VectorI *split_candidates) 
     | 
| 
      
 322 
     | 
    
         
            +
            void smawk(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
      
 323 
     | 
    
         
            +
            {
         
     | 
| 
       320 
324 
     | 
    
         
             
                const uint32_t imin  = rparams.imin;
         
     | 
| 
       321 
325 
     | 
    
         
             
                const uint32_t imax  = rparams.imax;
         
     | 
| 
       322 
326 
     | 
    
         
             
                const uint32_t istep = rparams.istep;
         
     | 
| 
         @@ -326,9 +330,9 @@ void smawk(State state, RowParams rparams, VectorI *split_candidates) { 
     | 
|
| 
       326 
330 
     | 
    
         
             
                } else {
         
     | 
| 
       327 
331 
     | 
    
         
             
                    VectorI *odd_candidates = prune_candidates(state, rparams, split_candidates);
         
     | 
| 
       328 
332 
     | 
    
         
             
                    /* printf("PRUNED\t"); vector_inspect_i(odd_candidates); */
         
     | 
| 
       329 
     | 
    
         
            -
                    uint32_t istepx2 
     | 
| 
       330 
     | 
    
         
            -
                    uint32_t imin_odd 
     | 
| 
       331 
     | 
    
         
            -
                    uint32_t imax_odd 
     | 
| 
      
 333 
     | 
    
         
            +
                    uint32_t istepx2        = istep * 2;
         
     | 
| 
      
 334 
     | 
    
         
            +
                    uint32_t imin_odd       = imin + istep;
         
     | 
| 
      
 335 
     | 
    
         
            +
                    uint32_t imax_odd       = imin_odd + ((imax - imin_odd) / istepx2 * istepx2);
         
     | 
| 
       332 
336 
     | 
    
         
             
                    RowParams rparams_odd   = { .row = rparams.row, .imin = imin_odd, .imax = imax_odd, .istep = istepx2 };
         
     | 
| 
       333 
337 
     | 
    
         | 
| 
       334 
338 
     | 
    
         
             
                    smawk(state, rparams_odd, odd_candidates);
         
     | 
| 
         @@ -342,32 +346,32 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       342 
346 
     | 
    
         
             
                uint32_t imin    = rparams.imin;
         
     | 
| 
       343 
347 
     | 
    
         
             
                uint32_t imax    = rparams.imax;
         
     | 
| 
       344 
348 
     | 
    
         
             
                uint32_t istep   = rparams.istep;
         
     | 
| 
       345 
     | 
    
         
            -
                uint32_t n       = split_candidates-> 
     | 
| 
      
 349 
     | 
    
         
            +
                uint32_t n       = split_candidates->size;
         
     | 
| 
       346 
350 
     | 
    
         
             
                uint32_t istepx2 = istep * 2;
         
     | 
| 
       347 
351 
     | 
    
         
             
                uint32_t jl      = vector_get_i(split_candidates, 0);
         
     | 
| 
       348 
     | 
    
         
            -
                VectorF *xsum 
     | 
| 
       349 
     | 
    
         
            -
                VectorF *xsumsq 
     | 
| 
       350 
     | 
    
         
            -
                MatrixI *splits 
     | 
| 
      
 352 
     | 
    
         
            +
                VectorF *xsum    = state.xsum;
         
     | 
| 
      
 353 
     | 
    
         
            +
                VectorF *xsumsq  = state.xsumsq;
         
     | 
| 
      
 354 
     | 
    
         
            +
                MatrixI *splits  = state.splits;
         
     | 
| 
       351 
355 
     | 
    
         | 
| 
       352 
356 
     | 
    
         
             
                for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
         
     | 
| 
       353 
357 
     | 
    
         
             
                    while (vector_get_i(split_candidates, r) < jl) r++;
         
     | 
| 
       354 
358 
     | 
    
         | 
| 
       355 
     | 
    
         
            -
                    uint32_t rcandidate 
     | 
| 
      
 359 
     | 
    
         
            +
                    uint32_t rcandidate    = vector_get_i(split_candidates, r);
         
     | 
| 
       356 
360 
     | 
    
         
             
                    uint32_t cost_base_row = row - 1;
         
     | 
| 
       357 
361 
     | 
    
         
             
                    uint32_t cost_base_col = rcandidate - 1;
         
     | 
| 
       358 
     | 
    
         
            -
                     
     | 
| 
      
 362 
     | 
    
         
            +
                    LDouble cost           =
         
     | 
| 
       359 
363 
     | 
    
         
             
                        matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissimilarity(rcandidate, i, xsum, xsumsq);
         
     | 
| 
       360 
364 
     | 
    
         | 
| 
       361 
365 
     | 
    
         
             
                    matrix_set_f(state.cost, row, i, cost);
         
     | 
| 
       362 
366 
     | 
    
         
             
                    matrix_set_i(state.splits, row, i, rcandidate);
         
     | 
| 
       363 
367 
     | 
    
         | 
| 
       364 
     | 
    
         
            -
                    uint32_t jh 
     | 
| 
      
 368 
     | 
    
         
            +
                    uint32_t jh =
         
     | 
| 
       365 
369 
     | 
    
         
             
                        (i + istep) <= imax
         
     | 
| 
       366 
370 
     | 
    
         
             
                        ? matrix_get_i(splits, row, i + istep)
         
     | 
| 
       367 
371 
     | 
    
         
             
                        : vector_get_i(split_candidates, n - 1);
         
     | 
| 
       368 
372 
     | 
    
         | 
| 
       369 
     | 
    
         
            -
                    uint32_t jmax 
     | 
| 
       370 
     | 
    
         
            -
                     
     | 
| 
      
 373 
     | 
    
         
            +
                    uint32_t jmax  = jh < i ? jh : i;
         
     | 
| 
      
 374 
     | 
    
         
            +
                    LDouble sjimin = dissimilarity(jmax, i, xsum, xsumsq);
         
     | 
| 
       371 
375 
     | 
    
         | 
| 
       372 
376 
     | 
    
         
             
                    for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
         
     | 
| 
       373 
377 
     | 
    
         
             
                        uint32_t jabs = vector_get_i(split_candidates, r);
         
     | 
| 
         @@ -375,9 +379,9 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       375 
379 
     | 
    
         
             
                        if (jabs > i) break;
         
     | 
| 
       376 
380 
     | 
    
         
             
                        if (jabs < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       377 
381 
     | 
    
         | 
| 
       378 
     | 
    
         
            -
                         
     | 
| 
       379 
     | 
    
         
            -
                         
     | 
| 
       380 
     | 
    
         
            -
                         
     | 
| 
      
 382 
     | 
    
         
            +
                        LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs  - 1);
         
     | 
| 
      
 383 
     | 
    
         
            +
                        LDouble sj        = cost_base + dissimilarity(jabs, i, xsum, xsumsq);
         
     | 
| 
      
 384 
     | 
    
         
            +
                        LDouble cost_prev = matrix_get_f(state.cost, row, i);
         
     | 
| 
       381 
385 
     | 
    
         | 
| 
       382 
386 
     | 
    
         
             
                        if (sj <= cost_prev) {
         
     | 
| 
       383 
387 
     | 
    
         
             
                            matrix_set_f(state.cost, row, i, sj);
         
     | 
| 
         @@ -394,10 +398,10 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat 
     | 
|
| 
       394 
398 
     | 
    
         | 
| 
       395 
399 
     | 
    
         
             
            void find_min_from_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       396 
400 
     | 
    
         
             
            {
         
     | 
| 
       397 
     | 
    
         
            -
                const uint32_t row 
     | 
| 
       398 
     | 
    
         
            -
                const uint32_t imin 
     | 
| 
       399 
     | 
    
         
            -
                const uint32_t imax 
     | 
| 
       400 
     | 
    
         
            -
                const uint32_t istep 
     | 
| 
      
 401 
     | 
    
         
            +
                const uint32_t row    = rparams.row;
         
     | 
| 
      
 402 
     | 
    
         
            +
                const uint32_t imin   = rparams.imin;
         
     | 
| 
      
 403 
     | 
    
         
            +
                const uint32_t imax   = rparams.imax;
         
     | 
| 
      
 404 
     | 
    
         
            +
                const uint32_t istep  = rparams.istep;
         
     | 
| 
       401 
405 
     | 
    
         
             
                MatrixF *const cost   = state.cost;
         
     | 
| 
       402 
406 
     | 
    
         
             
                MatrixI *const splits = state.splits;
         
     | 
| 
       403 
407 
     | 
    
         | 
| 
         @@ -408,19 +412,19 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       408 
412 
     | 
    
         
             
                    const uint32_t optimal_split_idx = optimal_split_idx_prev;
         
     | 
| 
       409 
413 
     | 
    
         
             
                    const uint32_t optimal_split     = vector_get_i(split_candidates, optimal_split_idx);
         
     | 
| 
       410 
414 
     | 
    
         
             
                    const uint32_t cost_prev         = matrix_get_f(cost, row - 1, optimal_split - 1);
         
     | 
| 
       411 
     | 
    
         
            -
                    const  
     | 
| 
      
 415 
     | 
    
         
            +
                    const LDouble added_cost         = dissimilarity(optimal_split, i, state.xsum, state.xsumsq);
         
     | 
| 
       412 
416 
     | 
    
         | 
| 
       413 
417 
     | 
    
         
             
                    matrix_set_f(cost, row, i, cost_prev + added_cost);
         
     | 
| 
       414 
418 
     | 
    
         
             
                    matrix_set_i(splits, row, i, optimal_split);
         
     | 
| 
       415 
419 
     | 
    
         | 
| 
       416 
     | 
    
         
            -
                    for (uint32_t r = optimal_split_idx + 1; r < split_candidates-> 
     | 
| 
      
 420 
     | 
    
         
            +
                    for (uint32_t r = optimal_split_idx + 1; r < split_candidates->size; r++)
         
     | 
| 
       417 
421 
     | 
    
         
             
                    {
         
     | 
| 
       418 
422 
     | 
    
         
             
                        uint32_t split = vector_get_i(split_candidates, r);
         
     | 
| 
       419 
423 
     | 
    
         | 
| 
       420 
424 
     | 
    
         
             
                        if (split < matrix_get_i(splits, row - 1, i)) continue;
         
     | 
| 
       421 
425 
     | 
    
         
             
                        if (split > i) break;
         
     | 
| 
       422 
426 
     | 
    
         | 
| 
       423 
     | 
    
         
            -
                         
     | 
| 
      
 427 
     | 
    
         
            +
                        LDouble split_cost =
         
     | 
| 
       424 
428 
     | 
    
         
             
                            matrix_get_f(cost, row - 1, split - 1) + dissimilarity(split, i, state.xsum, state.xsumsq);
         
     | 
| 
       425 
429 
     | 
    
         | 
| 
       426 
430 
     | 
    
         
             
                        if (split_cost > matrix_get_f(cost, row, i)) continue;
         
     | 
| 
         @@ -434,39 +438,39 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can 
     | 
|
| 
       434 
438 
     | 
    
         | 
| 
       435 
439 
     | 
    
         
             
            VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candidates)
         
     | 
| 
       436 
440 
     | 
    
         
             
            {
         
     | 
| 
       437 
     | 
    
         
            -
                uint32_t  
     | 
| 
       438 
     | 
    
         
            -
                uint32_t  
     | 
| 
      
 441 
     | 
    
         
            +
                uint32_t imin  = rparams.imin;
         
     | 
| 
      
 442 
     | 
    
         
            +
                uint32_t row   = rparams.row;
         
     | 
| 
      
 443 
     | 
    
         
            +
                uint32_t istep = rparams.istep;
         
     | 
| 
      
 444 
     | 
    
         
            +
                uint32_t n     = ((rparams.imax - imin) / istep) + 1;
         
     | 
| 
      
 445 
     | 
    
         
            +
                uint32_t m     = split_candidates->size;
         
     | 
| 
       439 
446 
     | 
    
         | 
| 
       440 
447 
     | 
    
         
             
                if (n >= m) return split_candidates;
         
     | 
| 
       441 
448 
     | 
    
         | 
| 
       442 
     | 
    
         
            -
                 
     | 
| 
      
 449 
     | 
    
         
            +
                uint32_t left   = 0;
         
     | 
| 
       443 
450 
     | 
    
         
             
                uint32_t right  = 0;
         
     | 
| 
       444 
451 
     | 
    
         
             
                VectorI *pruned = vector_dup_i(split_candidates, state.arena);
         
     | 
| 
       445 
452 
     | 
    
         | 
| 
       446 
453 
     | 
    
         
             
                while (m > n)
         
     | 
| 
       447 
454 
     | 
    
         
             
                {
         
     | 
| 
       448 
     | 
    
         
            -
                    uint32_t  
     | 
| 
       449 
     | 
    
         
            -
                    uint32_t  
     | 
| 
       450 
     | 
    
         
            -
                    uint32_t  
     | 
| 
       451 
     | 
    
         
            -
                     
     | 
| 
       452 
     | 
    
         
            -
             
     | 
| 
       453 
     | 
    
         
            -
             
     | 
| 
       454 
     | 
    
         
            -
             
     | 
| 
       455 
     | 
    
         
            -
             
     | 
| 
       456 
     | 
    
         
            -
             
     | 
| 
       457 
     | 
    
         
            -
             
     | 
| 
      
 455 
     | 
    
         
            +
                    uint32_t i     = imin + left * istep;
         
     | 
| 
      
 456 
     | 
    
         
            +
                    uint32_t j     = vector_get_i(pruned, right);
         
     | 
| 
      
 457 
     | 
    
         
            +
                    uint32_t jnext = vector_get_i(pruned, right + 1);
         
     | 
| 
      
 458 
     | 
    
         
            +
                    LDouble sl     =
         
     | 
| 
      
 459 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, j - 1) + dissimilarity(j, i, state.xsum, state.xsumsq);
         
     | 
| 
      
 460 
     | 
    
         
            +
                    LDouble snext  =
         
     | 
| 
      
 461 
     | 
    
         
            +
                        matrix_get_f(state.cost, row - 1, jnext - 1) + dissimilarity(jnext, i, state.xsum, state.xsumsq);
         
     | 
| 
      
 462 
     | 
    
         
            +
             
     | 
| 
      
 463 
     | 
    
         
            +
                    if ((sl < snext) && (left < n - 1)) {
         
     | 
| 
      
 464 
     | 
    
         
            +
                        vector_set_i(pruned, left, j);
         
     | 
| 
       458 
465 
     | 
    
         
             
                        left++;
         
     | 
| 
       459 
466 
     | 
    
         
             
                        right++;
         
     | 
| 
       460 
     | 
    
         
            -
             
     | 
| 
       461 
     | 
    
         
            -
                    } else if ((sl < snext) && (p == n - 1)) {
         
     | 
| 
      
 467 
     | 
    
         
            +
                    } else if ((sl < snext) && (left == n - 1)) {
         
     | 
| 
       462 
468 
     | 
    
         
             
                        right++;
         
     | 
| 
       463 
469 
     | 
    
         
             
                        m--;
         
     | 
| 
       464 
470 
     | 
    
         
             
                        vector_set_i(pruned, right, j);
         
     | 
| 
       465 
471 
     | 
    
         
             
                    } else {
         
     | 
| 
       466 
     | 
    
         
            -
                        if ( 
     | 
| 
       467 
     | 
    
         
            -
                             
     | 
| 
       468 
     | 
    
         
            -
                            vector_set_i(pruned, right, vector_get_i(pruned, left));
         
     | 
| 
       469 
     | 
    
         
            -
                            left--;
         
     | 
| 
      
 472 
     | 
    
         
            +
                        if (left > 0) {
         
     | 
| 
      
 473 
     | 
    
         
            +
                            vector_set_i(pruned, right, vector_get_i(pruned, --left));
         
     | 
| 
       470 
474 
     | 
    
         
             
                        } else {
         
     | 
| 
       471 
475 
     | 
    
         
             
                            right++;
         
     | 
| 
       472 
476 
     | 
    
         
             
                        }
         
     | 
| 
         @@ -475,8 +479,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       475 
479 
     | 
    
         
             
                    }
         
     | 
| 
       476 
480 
     | 
    
         
             
                }
         
     | 
| 
       477 
481 
     | 
    
         | 
| 
       478 
     | 
    
         
            -
                for (uint32_t i = left 
     | 
| 
       479 
     | 
    
         
            -
                    /* TODO: extract `vector_setcpy_T` */
         
     | 
| 
      
 482 
     | 
    
         
            +
                for (uint32_t i = left; i < m; i++) {
         
     | 
| 
       480 
483 
     | 
    
         
             
                    vector_set_i(pruned, i, vector_get_i(pruned, right++));
         
     | 
| 
       481 
484 
     | 
    
         
             
                }
         
     | 
| 
       482 
485 
     | 
    
         | 
| 
         @@ -485,101 +488,86 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida 
     | 
|
| 
       485 
488 
     | 
    
         
             
                return pruned;
         
     | 
| 
       486 
489 
     | 
    
         
             
            }
         
     | 
| 
       487 
490 
     | 
    
         | 
| 
       488 
     | 
    
         
            -
             
     | 
| 
       489 
     | 
    
         
            -
                 
     | 
| 
      
 491 
     | 
    
         
            +
            inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
         
     | 
| 
      
 492 
     | 
    
         
            +
                LDouble sji = 0.0;
         
     | 
| 
       490 
493 
     | 
    
         | 
| 
       491 
494 
     | 
    
         
             
                if (j >= i) return sji;
         
     | 
| 
       492 
495 
     | 
    
         | 
| 
       493 
496 
     | 
    
         
             
                if (j > 0) {
         
     | 
| 
       494 
     | 
    
         
            -
                     
     | 
| 
       495 
     | 
    
         
            -
                     
     | 
| 
       496 
     | 
    
         
            -
                     
     | 
| 
       497 
     | 
    
         
            -
                    sji                     = vector_get_diff_f(xsumsq, i, j - 1) - (segment_sum * segment_sum / segment_size);
         
     | 
| 
      
 497 
     | 
    
         
            +
                    LDouble segment_diff  = vector_get_diff_f(xsum, i, j - 1);
         
     | 
| 
      
 498 
     | 
    
         
            +
                    uint32_t segment_size = i - j + 1;
         
     | 
| 
      
 499 
     | 
    
         
            +
                    sji                   = vector_get_diff_f(xsumsq, i, j - 1) - (segment_diff * segment_diff / segment_size);
         
     | 
| 
       498 
500 
     | 
    
         
             
                } else {
         
     | 
| 
       499 
     | 
    
         
            -
                     
     | 
| 
       500 
     | 
    
         
            -
                    sji 
     | 
| 
      
 501 
     | 
    
         
            +
                    LDouble xsumi = vector_get_f(xsum, i);
         
     | 
| 
      
 502 
     | 
    
         
            +
                    sji           = vector_get_f(xsumsq, i) - (xsumi * xsumi / (i + 1));
         
     | 
| 
       501 
503 
     | 
    
         
             
                }
         
     | 
| 
       502 
504 
     | 
    
         | 
| 
       503 
505 
     | 
    
         
             
                return (sji > 0) ? sji : 0.0;
         
     | 
| 
       504 
506 
     | 
    
         
             
            }
         
     | 
| 
       505 
507 
     | 
    
         | 
| 
       506 
     | 
    
         
            -
            VectorF *vector_create_f(Arena *arena, uint32_t  
     | 
| 
      
 508 
     | 
    
         
            +
            inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
         
     | 
| 
       507 
509 
     | 
    
         
             
                VectorF *v;
         
     | 
| 
       508 
510 
     | 
    
         | 
| 
       509 
     | 
    
         
            -
                v 
     | 
| 
       510 
     | 
    
         
            -
                v->values 
     | 
| 
       511 
     | 
    
         
            -
                v-> 
     | 
| 
      
 511 
     | 
    
         
            +
                v         = arena_alloc(arena, sizeof(*v));
         
     | 
| 
      
 512 
     | 
    
         
            +
                v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
         
     | 
| 
      
 513 
     | 
    
         
            +
                v->size   = size;
         
     | 
| 
       512 
514 
     | 
    
         | 
| 
       513 
515 
     | 
    
         
             
                return v;
         
     | 
| 
       514 
516 
     | 
    
         
             
            }
         
     | 
| 
       515 
517 
     | 
    
         | 
| 
       516 
     | 
    
         
            -
            VectorI *vector_create_i(Arena *arena, uint32_t  
     | 
| 
      
 518 
     | 
    
         
            +
            inline VectorI *vector_create_i(Arena *arena, uint32_t size) {
         
     | 
| 
       517 
519 
     | 
    
         
             
                VectorI *v;
         
     | 
| 
       518 
520 
     | 
    
         | 
| 
       519 
     | 
    
         
            -
                v 
     | 
| 
       520 
     | 
    
         
            -
                v->values 
     | 
| 
       521 
     | 
    
         
            -
                v-> 
     | 
| 
      
 521 
     | 
    
         
            +
                v         = arena_alloc(arena, sizeof(*v));
         
     | 
| 
      
 522 
     | 
    
         
            +
                v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
         
     | 
| 
      
 523 
     | 
    
         
            +
                v->size   = size;
         
     | 
| 
       522 
524 
     | 
    
         | 
| 
       523 
525 
     | 
    
         
             
                return v;
         
     | 
| 
       524 
526 
     | 
    
         
             
            }
         
     | 
| 
       525 
527 
     | 
    
         | 
| 
       526 
     | 
    
         
            -
            VectorI *vector_dup_i(VectorI *v, Arena *arena)
         
     | 
| 
      
 528 
     | 
    
         
            +
            inline VectorI *vector_dup_i(VectorI *v, Arena *arena)
         
     | 
| 
       527 
529 
     | 
    
         
             
            {
         
     | 
| 
       528 
     | 
    
         
            -
                VectorI *vdup = vector_create_i(arena, v-> 
     | 
| 
      
 530 
     | 
    
         
            +
                VectorI *vdup = vector_create_i(arena, v->size);
         
     | 
| 
       529 
531 
     | 
    
         | 
| 
       530 
     | 
    
         
            -
                 
     | 
| 
       531 
     | 
    
         
            -
                for (uint32_t i = 0; i < v->nvalues; i++) {
         
     | 
| 
       532 
     | 
    
         
            -
                    vector_set_i(vdup, i, vector_get_i(v, i));
         
     | 
| 
       533 
     | 
    
         
            -
                }
         
     | 
| 
      
 532 
     | 
    
         
            +
                memcpy(vdup->values, v->values, sizeof(*(v->values)) * v->size);
         
     | 
| 
       534 
533 
     | 
    
         | 
| 
       535 
534 
     | 
    
         
             
                return vdup;
         
     | 
| 
       536 
535 
     | 
    
         
             
            }
         
     | 
| 
       537 
536 
     | 
    
         | 
| 
       538 
     | 
    
         
            -
            void vector_set_f(VectorF *v, uint32_t offset,  
     | 
| 
       539 
     | 
    
         
            -
                assert(offset < v->nvalues && "[vector_set_f] element index should be less than nvalues");
         
     | 
| 
       540 
     | 
    
         
            -
             
     | 
| 
      
 537 
     | 
    
         
            +
            inline void vector_set_f(VectorF *v, uint32_t offset, LDouble value) {
         
     | 
| 
       541 
538 
     | 
    
         
             
                *(v->values + offset) = value;
         
     | 
| 
       542 
539 
     | 
    
         
             
            }
         
     | 
| 
       543 
540 
     | 
    
         | 
| 
       544 
     | 
    
         
            -
            void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
         
     | 
| 
       545 
     | 
    
         
            -
                assert(offset < v->nvalues && "[vector_set_i] element index should be less than nvalues");
         
     | 
| 
       546 
     | 
    
         
            -
             
     | 
| 
      
 541 
     | 
    
         
            +
            inline void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
         
     | 
| 
       547 
542 
     | 
    
         
             
                *(v->values + offset) = value;
         
     | 
| 
       548 
543 
     | 
    
         
             
            }
         
     | 
| 
       549 
544 
     | 
    
         | 
| 
       550 
     | 
    
         
            -
            uint32_t vector_get_i(VectorI *v, uint32_t offset) {
         
     | 
| 
       551 
     | 
    
         
            -
                assert(offset < v->nvalues && "[vector_get_i] element index should be less than nvalues");
         
     | 
| 
       552 
     | 
    
         
            -
             
     | 
| 
      
 545 
     | 
    
         
            +
            inline uint32_t vector_get_i(VectorI *v, uint32_t offset) {
         
     | 
| 
       553 
546 
     | 
    
         
             
                return *(v->values + offset);
         
     | 
| 
       554 
547 
     | 
    
         
             
            }
         
     | 
| 
       555 
548 
     | 
    
         | 
| 
       556 
     | 
    
         
            -
            void vector_downsize_i(VectorI *v, uint32_t new_size) {
         
     | 
| 
       557 
     | 
    
         
            -
                v-> 
     | 
| 
      
 549 
     | 
    
         
            +
            inline void vector_downsize_i(VectorI *v, uint32_t new_size) {
         
     | 
| 
      
 550 
     | 
    
         
            +
                v->size = new_size;
         
     | 
| 
       558 
551 
     | 
    
         
             
            }
         
     | 
| 
       559 
552 
     | 
    
         | 
| 
       560 
553 
     | 
    
         
             
            void vector_inspect_i(VectorI *v) {
         
     | 
| 
       561 
     | 
    
         
            -
                for (uint32_t i = 0; i < v-> 
     | 
| 
      
 554 
     | 
    
         
            +
                for (uint32_t i = 0; i < v->size - 1; i++)
         
     | 
| 
       562 
555 
     | 
    
         
             
                    printf("%u, ", vector_get_i(v, i));
         
     | 
| 
       563 
     | 
    
         
            -
                printf("%u\n", vector_get_i(v, v-> 
     | 
| 
      
 556 
     | 
    
         
            +
                printf("%u\n", vector_get_i(v, v->size - 1));
         
     | 
| 
       564 
557 
     | 
    
         
             
            }
         
     | 
| 
       565 
558 
     | 
    
         | 
| 
       566 
     | 
    
         
            -
             
     | 
| 
       567 
     | 
    
         
            -
                assert(offset < v->nvalues && "[vector_get_f] element index should be less than nvalues");
         
     | 
| 
       568 
     | 
    
         
            -
             
     | 
| 
      
 559 
     | 
    
         
            +
            inline LDouble vector_get_f(VectorF *v, uint32_t offset) {
         
     | 
| 
       569 
560 
     | 
    
         
             
                return *(v->values + offset);
         
     | 
| 
       570 
561 
     | 
    
         
             
            }
         
     | 
| 
       571 
562 
     | 
    
         | 
| 
       572 
     | 
    
         
            -
             
     | 
| 
       573 
     | 
    
         
            -
                assert(i < v->nvalues && "[vector_get_diff_f] i should be less than nvalues");
         
     | 
| 
       574 
     | 
    
         
            -
                assert(j < v->nvalues && "[vector_get_diff_f] j should be less than nvalues");
         
     | 
| 
       575 
     | 
    
         
            -
             
     | 
| 
      
 563 
     | 
    
         
            +
            inline LDouble vector_get_diff_f(VectorF *v, uint32_t i, uint32_t j) {
         
     | 
| 
       576 
564 
     | 
    
         
             
                return *(v->values + i) - *(v->values + j);
         
     | 
| 
       577 
565 
     | 
    
         
             
            }
         
     | 
| 
       578 
566 
     | 
    
         | 
| 
       579 
567 
     | 
    
         
             
            void vector_inspect_f(VectorF *v) {
         
     | 
| 
       580 
     | 
    
         
            -
                for (uint32_t i = 0; i < v-> 
     | 
| 
      
 568 
     | 
    
         
            +
                for (uint32_t i = 0; i < v->size - 1; i++)
         
     | 
| 
       581 
569 
     | 
    
         
             
                    printf("%Lf, ", vector_get_f(v, i));
         
     | 
| 
       582 
     | 
    
         
            -
                printf("%Lf\n", vector_get_f(v, v-> 
     | 
| 
      
 570 
     | 
    
         
            +
                printf("%Lf\n", vector_get_f(v, v->size - 1));
         
     | 
| 
       583 
571 
     | 
    
         
             
            }
         
     | 
| 
       584 
572 
     | 
    
         | 
| 
       585 
573 
     | 
    
         
             
            MatrixF *matrix_create_f(Arena *arena, uint32_t nrows, uint32_t ncols) {
         
     | 
| 
         @@ -604,18 +592,12 @@ MatrixI *matrix_create_i(Arena *arena, uint32_t nrows, uint32_t ncols) { 
     | 
|
| 
       604 
592 
     | 
    
         
             
                return m;
         
     | 
| 
       605 
593 
     | 
    
         
             
            }
         
     | 
| 
       606 
594 
     | 
    
         | 
| 
       607 
     | 
    
         
            -
            void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j,  
     | 
| 
       608 
     | 
    
         
            -
                assert(i < m->nrows && "[matrix_set_f] row offset should be less than nrows");
         
     | 
| 
       609 
     | 
    
         
            -
                assert(j < m->cols &&  "[matrix_set_f] col offset should be less than ncols");
         
     | 
| 
       610 
     | 
    
         
            -
             
     | 
| 
      
 595 
     | 
    
         
            +
            inline void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j, LDouble value) {
         
     | 
| 
       611 
596 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       612 
597 
     | 
    
         
             
                *(m->values + offset) = value;
         
     | 
| 
       613 
598 
     | 
    
         
             
            }
         
     | 
| 
       614 
599 
     | 
    
         | 
| 
       615 
     | 
    
         
            -
             
     | 
| 
       616 
     | 
    
         
            -
                assert(i < m->nrows && "[matrix_get_f] row offset should be less than nrows");
         
     | 
| 
       617 
     | 
    
         
            -
                assert(j < m->cols &&  "[matrix_get_f] col offset should be less than ncols");
         
     | 
| 
       618 
     | 
    
         
            -
             
     | 
| 
      
 600 
     | 
    
         
            +
            inline LDouble matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
         
     | 
| 
       619 
601 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       620 
602 
     | 
    
         
             
                return *(m->values + offset);
         
     | 
| 
       621 
603 
     | 
    
         
             
            }
         
     | 
| 
         @@ -623,7 +605,7 @@ long double matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) { 
     | 
|
| 
       623 
605 
     | 
    
         
             
            void matrix_inspect_f(MatrixF *m) {
         
     | 
| 
       624 
606 
     | 
    
         
             
                for (uint32_t i = 0; i < m->nrows; i++) {
         
     | 
| 
       625 
607 
     | 
    
         
             
                    for (uint32_t j = 0; j < m->ncols - 1; j++) {
         
     | 
| 
       626 
     | 
    
         
            -
                         
     | 
| 
      
 608 
     | 
    
         
            +
                        LDouble value = matrix_get_f(m, i, j);
         
     | 
| 
       627 
609 
     | 
    
         | 
| 
       628 
610 
     | 
    
         
             
                        printf("%Lf, ", value);
         
     | 
| 
       629 
611 
     | 
    
         
             
                    }
         
     | 
| 
         @@ -639,23 +621,17 @@ void matrix_inspect_i(MatrixI *m) { 
     | 
|
| 
       639 
621 
     | 
    
         
             
                }
         
     | 
| 
       640 
622 
     | 
    
         
             
            }
         
     | 
| 
       641 
623 
     | 
    
         | 
| 
       642 
     | 
    
         
            -
            void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
         
     | 
| 
       643 
     | 
    
         
            -
                assert(i < m->nrows && "[matrix_set_i] row offset should be less than nrows");
         
     | 
| 
       644 
     | 
    
         
            -
                assert(j < m->cols &&  "[matrix_set_i] col offset should be less than ncols");
         
     | 
| 
       645 
     | 
    
         
            -
             
     | 
| 
      
 624 
     | 
    
         
            +
            inline void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
         
     | 
| 
       646 
625 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       647 
626 
     | 
    
         
             
                *(m->values + offset) = value;
         
     | 
| 
       648 
627 
     | 
    
         
             
            }
         
     | 
| 
       649 
628 
     | 
    
         | 
| 
       650 
     | 
    
         
            -
            uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
         
     | 
| 
       651 
     | 
    
         
            -
                assert(i < m->nrows && "[matrix_get_i] row offset should be less than nrows");
         
     | 
| 
       652 
     | 
    
         
            -
                assert(j < m->cols &&  "[matrix_get_i] col offset should be less than ncols");
         
     | 
| 
       653 
     | 
    
         
            -
             
     | 
| 
      
 629 
     | 
    
         
            +
            inline uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
         
     | 
| 
       654 
630 
     | 
    
         
             
                uint32_t offset = i * m->ncols + j;
         
     | 
| 
       655 
631 
     | 
    
         
             
                return *(m->values + offset);
         
     | 
| 
       656 
632 
     | 
    
         
             
            }
         
     | 
| 
       657 
633 
     | 
    
         | 
| 
       658 
     | 
    
         
            -
            Arena *arena_create( 
     | 
| 
      
 634 
     | 
    
         
            +
            Arena *arena_create(size_t capacity) {
         
     | 
| 
       659 
635 
     | 
    
         
             
                if (capacity < ARENA_MIN_CAPACITY) {
         
     | 
| 
       660 
636 
     | 
    
         
             
                    capacity = ARENA_MIN_CAPACITY;
         
     | 
| 
       661 
637 
     | 
    
         
             
                }
         
     | 
| 
         @@ -683,11 +659,11 @@ Arena *arena_create(uint32_t capacity) { 
     | 
|
| 
       683 
659 
     | 
    
         
             
                return arena;
         
     | 
| 
       684 
660 
     | 
    
         
             
            }
         
     | 
| 
       685 
661 
     | 
    
         | 
| 
       686 
     | 
    
         
            -
            void *arena_alloc(Arena *arena,  
     | 
| 
      
 662 
     | 
    
         
            +
            void *arena_alloc(Arena *arena, size_t size) {
         
     | 
| 
       687 
663 
     | 
    
         
             
                size = (size + 7) & ~7;
         
     | 
| 
       688 
664 
     | 
    
         | 
| 
       689 
665 
     | 
    
         
             
                if (arena->offset + size > arena->capacity) {
         
     | 
| 
       690 
     | 
    
         
            -
                     
     | 
| 
      
 666 
     | 
    
         
            +
                    rb_raise(rb_eNoMemError, "Arena Insufficient Capacity");
         
     | 
| 
       691 
667 
     | 
    
         
             
                    return NULL;
         
     | 
| 
       692 
668 
     | 
    
         
             
                }
         
     | 
| 
       693 
669 
     | 
    
         | 
| 
         @@ -698,7 +674,8 @@ void *arena_alloc(Arena *arena, uint32_t size) { 
     | 
|
| 
       698 
674 
     | 
    
         
             
            }
         
     | 
| 
       699 
675 
     | 
    
         | 
| 
       700 
676 
     | 
    
         
             
            void arena_destroy(Arena *arena) {
         
     | 
| 
       701 
     | 
    
         
            -
                /*  
     | 
| 
      
 677 
     | 
    
         
            +
                /* double leftover = ((double) arena->capacity - arena->offset) / arena->capacity * 100; */
         
     | 
| 
      
 678 
     | 
    
         
            +
                /* printf("[Arena Destroy] Capacity: %zu, offset: %zu, left: %2.2f%%\n", arena->capacity, arena->offset, leftover); */
         
     | 
| 
       702 
679 
     | 
    
         
             
                free(arena->buffer);
         
     | 
| 
       703 
680 
     | 
    
         
             
                free(arena);
         
     | 
| 
       704 
681 
     | 
    
         
             
            }
         
     | 
    
        data/lib/ckmeans/clusterer.rb
    CHANGED
    
    | 
         @@ -1,9 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            module Ckmeans
         
     | 
| 
       4 
     | 
    
         
            -
              class Clusterer # rubocop:disable Style/Documentation 
     | 
| 
       5 
     | 
    
         
            -
                PI_DOUBLE = Math::PI * 2
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
      
 4 
     | 
    
         
            +
              class Clusterer # rubocop:disable Style/Documentation
         
     | 
| 
       7 
5 
     | 
    
         
             
                def initialize(entries, kmin, kmax = kmin, kestimate = :regular)
         
     | 
| 
       8 
6 
     | 
    
         
             
                  @xcount = entries.size
         
     | 
| 
       9 
7 
     | 
    
         | 
| 
         @@ -26,301 +24,7 @@ module Ckmeans 
     | 
|
| 
       26 
24 
     | 
    
         
             
                      sorted_group_sizes.each_with_object([]) do |size, groups|
         
     | 
| 
       27 
25 
     | 
    
         
             
                        groups << @xsorted_original.shift(size)
         
     | 
| 
       28 
26 
     | 
    
         
             
                      end
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
            =begin # rubocop:disable Style/BlockComments
         
     | 
| 
       31 
     | 
    
         
            -
                      @cost   = Array.new(kmax) { Array.new(xcount) { 0.0 } }
         
     | 
| 
       32 
     | 
    
         
            -
                      @splits = Array.new(kmax) { Array.new(xcount) { 0 } }
         
     | 
| 
       33 
     | 
    
         
            -
                      @xsum   = Array.new(xcount)
         
     | 
| 
       34 
     | 
    
         
            -
                      @xsumsq = Array.new(xcount)
         
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
                      shift     = xsorted[xcount / 2]
         
     | 
| 
       37 
     | 
    
         
            -
                      xsum[0]   = xsorted[0].to_f - shift
         
     | 
| 
       38 
     | 
    
         
            -
                      xsumsq[0] = xsum[0]**2
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                      1.upto(xcount - 1) do |i|
         
     | 
| 
       41 
     | 
    
         
            -
                        xf = xsorted[i].to_f
         
     | 
| 
       42 
     | 
    
         
            -
                        xsum[i]      = xsum[i - 1] + xf - shift
         
     | 
| 
       43 
     | 
    
         
            -
                        xsumsq[i]    = xsumsq[i - 1] + ((xf - shift) * (xf - shift))
         
     | 
| 
       44 
     | 
    
         
            -
                        cost[0][i]   = dissim(0, i)
         
     | 
| 
       45 
     | 
    
         
            -
                        splits[0][i] = 0
         
     | 
| 
       46 
     | 
    
         
            -
                      end
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
                      kmax_idx = kmax - 1
         
     | 
| 
       49 
     | 
    
         
            -
                      1.upto(kmax_idx) do |q|
         
     | 
| 
       50 
     | 
    
         
            -
                        imin = q < kmax_idx ? [1, q].max : xcount - 1
         
     | 
| 
       51 
     | 
    
         
            -
                        fill_row(q, imin, xcount - 1)
         
     | 
| 
       52 
     | 
    
         
            -
                      end
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                      kopt = koptimal
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                      puts "RB COST\n", cost.map(&:inspect)
         
     | 
| 
       57 
     | 
    
         
            -
                      puts "RB SPLITS\n", splits.map(&:inspect)
         
     | 
| 
       58 
     | 
    
         
            -
                      puts "RB K OPTIMAL: #{kopt}"
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
                      backtrack(kopt).each_with_object(Array.new(kopt)) do |(q, left, right), res|
         
     | 
| 
       61 
     | 
    
         
            -
                        res[q] = xsorted[left..right]
         
     | 
| 
       62 
     | 
    
         
            -
                      end
         
     | 
| 
       63 
     | 
    
         
            -
            =end
         
     | 
| 
       64 
     | 
    
         
            -
                    end
         
     | 
| 
       65 
     | 
    
         
            -
                end
         
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
                private
         
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
                attr_reader :cost, :splits, :xsum, :xsumsq, :xcount, :xsorted, :kmin, :kmax
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
                def koptimal # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
         
     | 
| 
       72 
     | 
    
         
            -
                  kopt       = kmin
         
     | 
| 
       73 
     | 
    
         
            -
                  n          = xcount
         
     | 
| 
       74 
     | 
    
         
            -
                  max_bic    = 0.0
         
     | 
| 
       75 
     | 
    
         
            -
                  adjustment = kestimate == :sensitive ? 0.0 : 1.0 # Deviation from BIC formula to favor smaller clusters
         
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
                  kmin.upto(kmax) do |k|
         
     | 
| 
       78 
     | 
    
         
            -
                    sizes = backtrack(k).each_with_object(Array.new(k)) { |(q, left, right), sz| sz[q] = right - left + 1 }
         
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
                    index_left    = 0
         
     | 
| 
       81 
     | 
    
         
            -
                    index_right   = nil
         
     | 
| 
       82 
     | 
    
         
            -
                    loglikelihood = 0.0
         
     | 
| 
       83 
     | 
    
         
            -
                    bin_left      = nil
         
     | 
| 
       84 
     | 
    
         
            -
                    bin_right     = nil
         
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
                    k.times do |kb|
         
     | 
| 
       87 
     | 
    
         
            -
                      num_points_in_bin = sizes[kb]
         
     | 
| 
       88 
     | 
    
         
            -
                      index_right = index_left + num_points_in_bin - 1
         
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
                      if xsorted[index_left] < xsorted[index_right]
         
     | 
| 
       91 
     | 
    
         
            -
                        bin_left  = xsorted[index_left]
         
     | 
| 
       92 
     | 
    
         
            -
                        bin_right = xsorted[index_right]
         
     | 
| 
       93 
     | 
    
         
            -
                      elsif xsorted[index_left] == xsorted[index_right]
         
     | 
| 
       94 
     | 
    
         
            -
                        bin_left  = index_left == 0 ? xsorted[0] : (xsorted[index_left - 1] + xsorted[index_left]) / 2.0
         
     | 
| 
       95 
     | 
    
         
            -
                        bin_right = index_right < n - 1 ? (xsorted[index_right] + xsorted[index_right + 1]) / 2.0 : xsorted[n - 1]
         
     | 
| 
       96 
     | 
    
         
            -
                      else
         
     | 
| 
       97 
     | 
    
         
            -
                        raise "ERROR: binLeft > binRight"
         
     | 
| 
       98 
     | 
    
         
            -
                      end
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
                      bin_width = bin_right.to_f - bin_left
         
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
                      mean, variance = shifted_data_variance(index_left, index_right)
         
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
                      if variance > 0
         
     | 
| 
       105 
     | 
    
         
            -
                        (index_left..index_right).each do |i|
         
     | 
| 
       106 
     | 
    
         
            -
                          loglikelihood += -(xsorted[i] - mean) * (xsorted[i] - mean) / (2.0 * variance)
         
     | 
| 
       107 
     | 
    
         
            -
                        end
         
     | 
| 
       108 
     | 
    
         
            -
                        loglikelihood +=
         
     | 
| 
       109 
     | 
    
         
            -
                          num_points_in_bin *
         
     | 
| 
       110 
     | 
    
         
            -
                          ((Math.log(num_points_in_bin / n.to_f) * adjustment) - (0.5 * Math.log(PI_DOUBLE * variance)))
         
     | 
| 
       111 
     | 
    
         
            -
                      else
         
     | 
| 
       112 
     | 
    
         
            -
                        loglikelihood += num_points_in_bin * Math.log(1.0 / bin_width / n)
         
     | 
| 
       113 
     | 
    
         
            -
                      end
         
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
                      index_left = index_right + 1
         
     | 
| 
       116 
     | 
    
         
            -
                    end
         
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
                    bic = (2.0 * loglikelihood) - (((3 * k) - 1) * Math.log(n.to_f))
         
     | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
                    if k == kmin
         
     | 
| 
       121 
     | 
    
         
            -
                      max_bic = bic
         
     | 
| 
       122 
     | 
    
         
            -
                      kopt    = kmin
         
     | 
| 
       123 
     | 
    
         
            -
                    elsif bic > max_bic
         
     | 
| 
       124 
     | 
    
         
            -
                      max_bic = bic
         
     | 
| 
       125 
     | 
    
         
            -
                      kopt    = k
         
     | 
| 
       126 
     | 
    
         
            -
                    end
         
     | 
| 
       127 
     | 
    
         
            -
                  end
         
     | 
| 
       128 
     | 
    
         
            -
             
     | 
| 
       129 
     | 
    
         
            -
                  kopt
         
     | 
| 
       130 
     | 
    
         
            -
                end
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
                def shifted_data_variance(ileft, iright)
         
     | 
| 
       133 
     | 
    
         
            -
                  sum      = 0.0
         
     | 
| 
       134 
     | 
    
         
            -
                  sumsq    = 0.0
         
     | 
| 
       135 
     | 
    
         
            -
                  mean     = 0.0
         
     | 
| 
       136 
     | 
    
         
            -
                  variance = 0.0
         
     | 
| 
       137 
     | 
    
         
            -
                  n        = iright - ileft + 1
         
     | 
| 
       138 
     | 
    
         
            -
             
     | 
| 
       139 
     | 
    
         
            -
                  if iright >= ileft
         
     | 
| 
       140 
     | 
    
         
            -
                    median = xsorted[(ileft + iright) / 2].to_f
         
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
                    ileft.upto(iright) do |i|
         
     | 
| 
       143 
     | 
    
         
            -
                      sumi   = xsorted[i] - median
         
     | 
| 
       144 
     | 
    
         
            -
                      sum   += sumi
         
     | 
| 
       145 
     | 
    
         
            -
                      sumsq += sumi**2
         
     | 
| 
       146 
     | 
    
         
            -
                    end
         
     | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
       148 
     | 
    
         
            -
                    mean     = (sum / n) + median
         
     | 
| 
       149 
     | 
    
         
            -
                    variance = (sumsq - (sum * sum / n)) / (n - 1) if n > 1
         
     | 
| 
       150 
     | 
    
         
            -
                  end
         
     | 
| 
       151 
     | 
    
         
            -
             
     | 
| 
       152 
     | 
    
         
            -
                  [mean, variance]
         
     | 
| 
       153 
     | 
    
         
            -
                end
         
     | 
| 
       154 
     | 
    
         
            -
             
     | 
| 
       155 
     | 
    
         
            -
                def backtrack(k)
         
     | 
| 
       156 
     | 
    
         
            -
                  return to_enum(__method__, k) unless block_given?
         
     | 
| 
       157 
     | 
    
         
            -
             
     | 
| 
       158 
     | 
    
         
            -
                  right = xcount - 1
         
     | 
| 
       159 
     | 
    
         
            -
                  left  = nil
         
     | 
| 
       160 
     | 
    
         
            -
             
     | 
| 
       161 
     | 
    
         
            -
                  (k - 1).downto(0) do |q|
         
     | 
| 
       162 
     | 
    
         
            -
                    left = splits[q][right]
         
     | 
| 
       163 
     | 
    
         
            -
             
     | 
| 
       164 
     | 
    
         
            -
                    yield q, left, right
         
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
                    right = left - 1 if q > 0
         
     | 
| 
       167 
     | 
    
         
            -
                  end
         
     | 
| 
       168 
     | 
    
         
            -
                end
         
     | 
| 
       169 
     | 
    
         
            -
             
     | 
| 
       170 
     | 
    
         
            -
                def dissim(j, i)
         
     | 
| 
       171 
     | 
    
         
            -
                  return 0.0 if j >= i
         
     | 
| 
       172 
     | 
    
         
            -
             
     | 
| 
       173 
     | 
    
         
            -
                  sji =
         
     | 
| 
       174 
     | 
    
         
            -
                    if j > 0
         
     | 
| 
       175 
     | 
    
         
            -
                      segment_sum = xsum[i] - xsum[j - 1]
         
     | 
| 
       176 
     | 
    
         
            -
                      segment_size = i - j + 1
         
     | 
| 
       177 
     | 
    
         
            -
                      xsumsq[i] - xsumsq[j - 1] - (segment_sum * segment_sum / segment_size)
         
     | 
| 
       178 
     | 
    
         
            -
                    else
         
     | 
| 
       179 
     | 
    
         
            -
                      xsumsq[i] - (xsum[i] * xsum[i] / (i + 1))
         
     | 
| 
       180 
     | 
    
         
            -
                    end
         
     | 
| 
       181 
     | 
    
         
            -
             
     | 
| 
       182 
     | 
    
         
            -
                  [0, sji].max
         
     | 
| 
       183 
     | 
    
         
            -
                end
         
     | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
       185 
     | 
    
         
            -
                def fill_row(q, imin, imax)
         
     | 
| 
       186 
     | 
    
         
            -
                  size = imax - q + 1
         
     | 
| 
       187 
     | 
    
         
            -
             
     | 
| 
       188 
     | 
    
         
            -
                  js = Array.new(size) { |i| q + i }
         
     | 
| 
       189 
     | 
    
         
            -
                  smawk(imin, imax, 1, q, js)
         
     | 
| 
       190 
     | 
    
         
            -
                end
         
     | 
| 
       191 
     | 
    
         
            -
             
     | 
| 
       192 
     | 
    
         
            -
                def smawk(imin, imax, istep, q, js)
         
     | 
| 
       193 
     | 
    
         
            -
                  if (imax - imin) <= (0 * istep)
         
     | 
| 
       194 
     | 
    
         
            -
                    find_min_from_candidates(q, imin, imax, istep, js)
         
     | 
| 
       195 
     | 
    
         
            -
                  else
         
     | 
| 
       196 
     | 
    
         
            -
                    js_odd = prune_candidates(imin, imax, istep, q, js)
         
     | 
| 
       197 
     | 
    
         
            -
                    # puts "Pruned: #{js_odd.inspect}"
         
     | 
| 
       198 
     | 
    
         
            -
                    istepx2 = istep * 2
         
     | 
| 
       199 
     | 
    
         
            -
                    imin_odd = imin + istep
         
     | 
| 
       200 
     | 
    
         
            -
                    imax_odd = imin_odd + ((imax - imin_odd) / istepx2 * istepx2)
         
     | 
| 
       201 
     | 
    
         
            -
                    smawk(imin_odd, imax_odd, istepx2, q, js_odd)
         
     | 
| 
       202 
     | 
    
         
            -
                    fill_even_positions(imin, imax, istep, q, js)
         
     | 
| 
       203 
     | 
    
         
            -
                  end
         
     | 
| 
       204 
     | 
    
         
            -
                end
         
     | 
| 
       205 
     | 
    
         
            -
             
     | 
| 
       206 
     | 
    
         
            -
                def find_min_from_candidates(q, imin, imax, istep, js)
         
     | 
| 
       207 
     | 
    
         
            -
                  optimal_split_index_prev = 0
         
     | 
| 
       208 
     | 
    
         
            -
             
     | 
| 
       209 
     | 
    
         
            -
                  (imin..imax).step(istep) do |i|
         
     | 
| 
       210 
     | 
    
         
            -
                    optimal_split_index = optimal_split_index_prev
         
     | 
| 
       211 
     | 
    
         
            -
                    optimal_split       = js[optimal_split_index]
         
     | 
| 
       212 
     | 
    
         
            -
                    cost[q][i]          = cost[q - 1][optimal_split - 1] + dissim(optimal_split, i)
         
     | 
| 
       213 
     | 
    
         
            -
                    splits[q][i]        = optimal_split
         
     | 
| 
       214 
     | 
    
         
            -
             
     | 
| 
       215 
     | 
    
         
            -
                    ((optimal_split_index + 1)...js.size).each do |split_index|
         
     | 
| 
       216 
     | 
    
         
            -
                      jabs = js[split_index]
         
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
       218 
     | 
    
         
            -
                      next if jabs < splits[q - 1][i]
         
     | 
| 
       219 
     | 
    
         
            -
                      break if jabs > i
         
     | 
| 
       220 
     | 
    
         
            -
             
     | 
| 
       221 
     | 
    
         
            -
                      sj = cost[q - 1][jabs - 1] + dissim(jabs, i)
         
     | 
| 
       222 
     | 
    
         
            -
             
     | 
| 
       223 
     | 
    
         
            -
                      next unless sj <= cost[q][i]
         
     | 
| 
       224 
     | 
    
         
            -
             
     | 
| 
       225 
     | 
    
         
            -
                      cost[q][i]               = sj
         
     | 
| 
       226 
     | 
    
         
            -
                      splits[q][i]             = js[split_index]
         
     | 
| 
       227 
     | 
    
         
            -
                      optimal_split_index_prev = split_index
         
     | 
| 
       228 
     | 
    
         
            -
                    end
         
     | 
| 
       229 
     | 
    
         
            -
                  end
         
     | 
| 
       230 
     | 
    
         
            -
                end
         
     | 
| 
       231 
     | 
    
         
            -
             
     | 
| 
       232 
     | 
    
         
            -
                def prune_candidates(imin, imax, istep, q, js)
         
     | 
| 
       233 
     | 
    
         
            -
                  n = ((imax - imin) / istep) + 1
         
     | 
| 
       234 
     | 
    
         
            -
                  m = js.size
         
     | 
| 
       235 
     | 
    
         
            -
             
     | 
| 
       236 
     | 
    
         
            -
                  return js if n >= m
         
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
                  pruned = js.dup
         
     | 
| 
       239 
     | 
    
         
            -
                  left = -1
         
     | 
| 
       240 
     | 
    
         
            -
                  right = 0
         
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
       242 
     | 
    
         
            -
                  while m > n
         
     | 
| 
       243 
     | 
    
         
            -
                    p     = left + 1
         
     | 
| 
       244 
     | 
    
         
            -
                    i     = imin + (p * istep)
         
     | 
| 
       245 
     | 
    
         
            -
                    j     = pruned[right]
         
     | 
| 
       246 
     | 
    
         
            -
                    jnext = pruned[right + 1]
         
     | 
| 
       247 
     | 
    
         
            -
                    sl    = cost[q - 1][j - 1] + dissim(j, i)
         
     | 
| 
       248 
     | 
    
         
            -
                    snext = cost[q - 1][jnext - 1] + dissim(jnext, i)
         
     | 
| 
       249 
     | 
    
         
            -
             
     | 
| 
       250 
     | 
    
         
            -
                    if (sl < snext) && (p < n - 1)
         
     | 
| 
       251 
     | 
    
         
            -
                      left += 1
         
     | 
| 
       252 
     | 
    
         
            -
                      pruned[left] = j
         
     | 
| 
       253 
     | 
    
         
            -
                      right += 1
         
     | 
| 
       254 
     | 
    
         
            -
                    elsif (sl < snext) && (p == n - 1)
         
     | 
| 
       255 
     | 
    
         
            -
                      right += 1
         
     | 
| 
       256 
     | 
    
         
            -
                      pruned[right] = j
         
     | 
| 
       257 
     | 
    
         
            -
                      m -= 1
         
     | 
| 
       258 
     | 
    
         
            -
                    else
         
     | 
| 
       259 
     | 
    
         
            -
                      if p > 0
         
     | 
| 
       260 
     | 
    
         
            -
                        pruned[right] = pruned[left]
         
     | 
| 
       261 
     | 
    
         
            -
                        left -= 1
         
     | 
| 
       262 
     | 
    
         
            -
                      else
         
     | 
| 
       263 
     | 
    
         
            -
                        right += 1
         
     | 
| 
       264 
     | 
    
         
            -
                      end
         
     | 
| 
       265 
     | 
    
         
            -
             
     | 
| 
       266 
     | 
    
         
            -
                      m -= 1
         
     | 
| 
       267 
     | 
    
         
            -
                    end
         
     | 
| 
       268 
     | 
    
         
            -
                  end
         
     | 
| 
       269 
     | 
    
         
            -
             
     | 
| 
       270 
     | 
    
         
            -
                  ((left + 1)...m).each do |r|
         
     | 
| 
       271 
     | 
    
         
            -
                    pruned[r] = pruned[right]
         
     | 
| 
       272 
     | 
    
         
            -
                    right += 1
         
     | 
| 
       273 
     | 
    
         
            -
                  end
         
     | 
| 
       274 
     | 
    
         
            -
             
     | 
| 
       275 
     | 
    
         
            -
                  pruned.slice!(m..-1) if pruned.size > m
         
     | 
| 
       276 
     | 
    
         
            -
                  pruned
         
     | 
| 
       277 
     | 
    
         
            -
                end
         
     | 
| 
       278 
     | 
    
         
            -
             
     | 
| 
       279 
     | 
    
         
            -
                def fill_even_positions(imin, imax, istep, q, js)
         
     | 
| 
       280 
     | 
    
         
            -
                  n = js.size
         
     | 
| 
       281 
     | 
    
         
            -
                  istepx2 = istep * 2
         
     | 
| 
       282 
     | 
    
         
            -
                  jl = js[0]
         
     | 
| 
       283 
     | 
    
         
            -
             
     | 
| 
       284 
     | 
    
         
            -
                  i = imin
         
     | 
| 
       285 
     | 
    
         
            -
                  r = 0
         
     | 
| 
       286 
     | 
    
         
            -
                  while i <= imax
         
     | 
| 
       287 
     | 
    
         
            -
                    r += 1 while js[r] < jl
         
     | 
| 
       288 
     | 
    
         
            -
             
     | 
| 
       289 
     | 
    
         
            -
                    cost[q][i]   = cost[q - 1][js[r] - 1] + dissim(js[r], i)
         
     | 
| 
       290 
     | 
    
         
            -
                    splits[q][i] = js[r]
         
     | 
| 
       291 
     | 
    
         
            -
                    jh           = (i + istep) <= imax ? splits[q][i + istep] : js[n - 1]
         
     | 
| 
       292 
     | 
    
         
            -
                    jmax         = [jh, i].min
         
     | 
| 
       293 
     | 
    
         
            -
                    sjimin       = dissim(jmax, i)
         
     | 
| 
       294 
     | 
    
         
            -
             
     | 
| 
       295 
     | 
    
         
            -
                    r += 1
         
     | 
| 
       296 
     | 
    
         
            -
                    while r < n && js[r] <= jmax
         
     | 
| 
       297 
     | 
    
         
            -
                      jabs = js[r]
         
     | 
| 
       298 
     | 
    
         
            -
             
     | 
| 
       299 
     | 
    
         
            -
                      break if jabs > i
         
     | 
| 
       300 
     | 
    
         
            -
             
     | 
| 
       301 
     | 
    
         
            -
                      if jabs < splits[q - 1][i]
         
     | 
| 
       302 
     | 
    
         
            -
                        r += 1
         
     | 
| 
       303 
     | 
    
         
            -
                        next
         
     | 
| 
       304 
     | 
    
         
            -
                      end
         
     | 
| 
       305 
     | 
    
         
            -
             
     | 
| 
       306 
     | 
    
         
            -
                      cost_base = cost[q - 1][jabs - 1]
         
     | 
| 
       307 
     | 
    
         
            -
                      sj        = cost_base + dissim(jabs, i)
         
     | 
| 
       308 
     | 
    
         
            -
             
     | 
| 
       309 
     | 
    
         
            -
                      if sj <= cost[q][i]
         
     | 
| 
       310 
     | 
    
         
            -
                        cost[q][i]   = sj
         
     | 
| 
       311 
     | 
    
         
            -
                        splits[q][i] = jabs
         
     | 
| 
       312 
     | 
    
         
            -
                      elsif cost_base + sjimin > cost[q][i]
         
     | 
| 
       313 
     | 
    
         
            -
                        break
         
     | 
| 
       314 
     | 
    
         
            -
                      end
         
     | 
| 
       315 
     | 
    
         
            -
             
     | 
| 
       316 
     | 
    
         
            -
                      r += 1
         
     | 
| 
       317 
27 
     | 
    
         
             
                    end
         
     | 
| 
       318 
     | 
    
         
            -
             
     | 
| 
       319 
     | 
    
         
            -
                    r -= 1
         
     | 
| 
       320 
     | 
    
         
            -
                    jl = jh
         
     | 
| 
       321 
     | 
    
         
            -
             
     | 
| 
       322 
     | 
    
         
            -
                    i += istepx2
         
     | 
| 
       323 
     | 
    
         
            -
                  end
         
     | 
| 
       324 
28 
     | 
    
         
             
                end
         
     | 
| 
       325 
29 
     | 
    
         
             
              end
         
     | 
| 
       326 
30 
     | 
    
         
             
            end
         
     | 
    
        data/lib/ckmeans/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: ckmeans
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.0.3
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Vlad Lebedev
         
     | 
| 
       8 
8 
     | 
    
         
             
            bindir: exe
         
     | 
| 
       9 
9 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       10 
     | 
    
         
            -
            date: 2025- 
     | 
| 
      
 10 
     | 
    
         
            +
            date: 2025-05-01 00:00:00.000000000 Z
         
     | 
| 
       11 
11 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       12 
12 
     | 
    
         
             
            description: Repeatable clustering of unidimensional data
         
     | 
| 
       13 
13 
     | 
    
         
             
            email:
         
     | 
| 
         @@ -17,11 +17,13 @@ extensions: 
     | 
|
| 
       17 
17 
     | 
    
         
             
            - ext/ckmeans/extconf.rb
         
     | 
| 
       18 
18 
     | 
    
         
             
            extra_rdoc_files: []
         
     | 
| 
       19 
19 
     | 
    
         
             
            files:
         
     | 
| 
      
 20 
     | 
    
         
            +
            - ".dockerignore"
         
     | 
| 
       20 
21 
     | 
    
         
             
            - ".rspec"
         
     | 
| 
       21 
22 
     | 
    
         
             
            - ".rubocop.yml"
         
     | 
| 
       22 
23 
     | 
    
         
             
            - ".rubocop_todo.yml"
         
     | 
| 
       23 
24 
     | 
    
         
             
            - ".ruby-version"
         
     | 
| 
       24 
25 
     | 
    
         
             
            - CHANGELOG.md
         
     | 
| 
      
 26 
     | 
    
         
            +
            - Dockerfile
         
     | 
| 
       25 
27 
     | 
    
         
             
            - LICENSE
         
     | 
| 
       26 
28 
     | 
    
         
             
            - README.md
         
     | 
| 
       27 
29 
     | 
    
         
             
            - Rakefile
         
     |