ckmeans 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: be110daab8039e8a76ccbc68808120caf3aa7b189e107d6bced6d3519e8d917c
4
- data.tar.gz: 6647bce619e675a4e24f4a17ec8e5aee23280cd97d89b29433da56e122f3c932
3
+ metadata.gz: 5ad7e8c24dd367d5e6a6dd66abc529ae92079cf99d1c781a7646c929547b0e62
4
+ data.tar.gz: 2e338ca878eba2d250ca61fff2ea8bee44ec8387b37e12b31600edf9da2b7130
5
5
  SHA512:
6
- metadata.gz: 54c6292bbae43afdbb4c618983c9a602ce68fec83239cf3a2f77c42fb8544fb45801d71910796db49cdf872a6151f616a711bce9c1fe537a202cae476bbb995b
7
- data.tar.gz: 82d39fb2a5870a92ab579f5342de64e69a170c1d9ef31e9d5e121fb06902d3a3dba1b6a5cd4b56b432524e991e53221b78ebee7c88bbbbea868ee63ccb256d0e
6
+ metadata.gz: 8c59e1e159cc9cada8afed9e016a5d8956cfe909bb7b7d82c8d155f388fdf1924a49072d37e52065fa643a539da3a192767eddb38da95b2c2524bcc7d0a39ebd
7
+ data.tar.gz: f2b535377d441bc1f2ee309a5466c8231b425aa0dd9b0512aa36257defa12b3b645694ae953b2b5e3b6997c50bde796e8fa1c2f8f10d4055b1cc9cb6abcf1353
data/.dockerignore ADDED
@@ -0,0 +1,13 @@
1
+ tmp
2
+
3
+ # Ignore compiled extension files
4
+ *.bundle
5
+ *.so
6
+ *.o
7
+ *.dll
8
+
9
+ # Ignore generated makefiles and compilation artifacts
10
+ ext/**/Makefile
11
+ ext/**/mkmf.log
12
+ ext/**/*.log
13
+ ext/**/tmp/
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [1.0.1] - 2025-04-24
4
+
5
+ - https://github.com/vlebedeff/rb-ckmeans/pull/9
6
+ - https://github.com/vlebedeff/rb-ckmeans/pull/8
7
+
8
+ ## [1.0.0] - 2025-04-22
9
+
10
+ - https://github.com/vlebedeff/rb-ckmeans/pull/6
11
+
3
12
  ## [0.1.2] - 2025-03-31
4
13
 
5
14
  - https://github.com/vlebedeff/rb-ckmeans/pull/3
data/Dockerfile ADDED
@@ -0,0 +1,11 @@
1
+ FROM public.ecr.aws/docker/library/ruby:3.2.2
2
+
3
+ RUN apt-get update && apt-get install -y build-essential ruby-dev
4
+
5
+ RUN gem install bundler -v 2.6.5
6
+
7
+ WORKDIR /opt/rb-ckmeans
8
+ COPY . .
9
+ RUN bundle install -j 12
10
+
11
+ ENTRYPOINT ["bundle", "exec"]
@@ -3,16 +3,18 @@
3
3
  #include <string.h>
4
4
  #include "ruby.h"
5
5
 
6
+ typedef long double LDouble;
7
+
6
8
  typedef struct Arena {
7
- uint32_t capacity;
8
- uint32_t offset;
9
+ size_t capacity;
10
+ size_t offset;
9
11
  uint8_t *buffer;
10
12
  } Arena;
11
13
 
12
14
  typedef struct MatrixF {
13
15
  uint32_t ncols;
14
16
  uint32_t nrows;
15
- long double *values;
17
+ LDouble *values;
16
18
  } MatrixF;
17
19
 
18
20
  typedef struct MatrixI {
@@ -22,12 +24,12 @@ typedef struct MatrixI {
22
24
  } MatrixI;
23
25
 
24
26
  typedef struct VectorF {
25
- uint32_t nvalues;
26
- long double *values;
27
+ uint32_t size;
28
+ LDouble *values;
27
29
  } VectorF;
28
30
 
29
31
  typedef struct VectorI {
30
- uint32_t nvalues;
32
+ uint32_t size;
31
33
  uint32_t *values;
32
34
  } VectorI;
33
35
 
@@ -52,69 +54,71 @@ typedef struct RowParams {
52
54
  } RowParams;
53
55
 
54
56
  typedef struct {
55
- long double mean;
56
- long double variance;
57
+ LDouble mean;
58
+ LDouble variance;
57
59
  } SegmentStats;
58
60
 
59
- VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
60
-
61
- Arena *arena_create(uint32_t);
62
- void *arena_alloc(Arena*, uint32_t);
63
- void arena_destroy(Arena*);
64
-
65
- MatrixF *matrix_create_f(Arena*, uint32_t, uint32_t);
66
- MatrixI *matrix_create_i(Arena*, uint32_t, uint32_t);
67
- void matrix_set_f(MatrixF*, uint32_t, uint32_t, long double value);
68
- long double matrix_get_f(MatrixF*, uint32_t, uint32_t);
69
- void matrix_inspect_f(MatrixF*);
70
- void matrix_set_i(MatrixI*, uint32_t, uint32_t, uint32_t value);
71
- uint32_t matrix_get_i(MatrixI*, uint32_t, uint32_t);
72
- void matrix_inspect_i(MatrixI*);
73
-
74
- VectorF *vector_create_f(Arena*, uint32_t);
75
- void vector_set_f(VectorF*, uint32_t offset, long double value);
76
- long double vector_get_f(VectorF*, uint32_t offset);
77
- long double vector_get_diff_f(VectorF*, uint32_t, uint32_t);
78
- void vector_inspect_f(VectorF*);
79
- VectorI *vector_create_i(Arena*, uint32_t);
80
- VectorI *vector_dup_i(VectorI*, Arena*);
81
- void vector_set_i(VectorI*, uint32_t offset, uint32_t value);
82
- uint32_t vector_get_i(VectorI*, uint32_t offset);
83
- void vector_downsize_i(VectorI*, uint32_t);
84
- void vector_inspect_i(VectorI*);
85
-
86
- long double dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
61
+ VALUE rb_ckmeans_sorted_group_sizes(VALUE self);
62
+
63
+ Arena *arena_create(size_t);
64
+ void *arena_alloc(Arena*, size_t);
65
+ void arena_destroy(Arena*);
66
+
67
+ MatrixF *matrix_create_f(Arena*, uint32_t, uint32_t);
68
+ MatrixI *matrix_create_i(Arena*, uint32_t, uint32_t);
69
+ void matrix_set_f(MatrixF*, uint32_t, uint32_t, LDouble value);
70
+ LDouble matrix_get_f(MatrixF*, uint32_t, uint32_t);
71
+ void matrix_inspect_f(MatrixF*);
72
+ void matrix_set_i(MatrixI*, uint32_t, uint32_t, uint32_t value);
73
+ uint32_t matrix_get_i(MatrixI*, uint32_t, uint32_t);
74
+ void matrix_inspect_i(MatrixI*);
75
+
76
+ VectorF *vector_create_f(Arena*, uint32_t);
77
+ void vector_set_f(VectorF*, uint32_t offset, LDouble value);
78
+ LDouble vector_get_f(VectorF*, uint32_t offset);
79
+ LDouble vector_get_diff_f(VectorF*, uint32_t, uint32_t);
80
+ void vector_inspect_f(VectorF*);
81
+ VectorI *vector_create_i(Arena*, uint32_t);
82
+ VectorI *vector_dup_i(VectorI*, Arena*);
83
+ void vector_set_i(VectorI*, uint32_t offset, uint32_t value);
84
+ uint32_t vector_get_i(VectorI*, uint32_t offset);
85
+ void vector_downsize_i(VectorI*, uint32_t);
86
+ void vector_inspect_i(VectorI*);
87
+
88
+ LDouble dissimilarity(uint32_t, uint32_t, VectorF*, VectorF*);
87
89
  void fill_row(State, uint32_t, uint32_t, uint32_t);
88
90
  void smawk(State, RowParams, VectorI*);
89
91
  void find_min_from_candidates(State, RowParams, VectorI*);
90
- VectorI *prune_candidates(State, RowParams, VectorI*);
92
+ VectorI *prune_candidates(State, RowParams, VectorI*);
91
93
  void fill_even_positions(State, RowParams, VectorI*);
92
94
  SegmentStats shifted_data_variance(VectorF*, uint32_t, uint32_t);
93
- VectorI *backtrack_sizes(State, uint32_t);
95
+ VectorI *backtrack_sizes(State, VectorI*, uint32_t);
94
96
  uint32_t find_koptimal(State);
95
97
 
96
98
  void Init_extensions(void) {
97
- VALUE ckmeans_module = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
99
+ VALUE ckmeans_module = rb_const_get(rb_cObject, rb_intern("Ckmeans"));
98
100
  VALUE clusterer_class = rb_const_get(ckmeans_module, rb_intern("Clusterer"));
99
101
 
100
102
  rb_define_private_method(clusterer_class, "sorted_group_sizes", rb_ckmeans_sorted_group_sizes, 0);
101
103
  }
102
104
 
103
- # define ARENA_MIN_CAPACITY 1024
104
- # define ALLOCATION_FACTOR 20
105
+ # define ARENA_MIN_CAPACITY 100
105
106
  # define PIx2 (M_PI * 2.0)
106
107
 
107
- VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
108
- VALUE rb_xcount = rb_ivar_get(self, rb_intern("@xcount"));
109
- VALUE rb_kmin = rb_ivar_get(self, rb_intern("@kmin"));
110
- VALUE rb_kmax = rb_ivar_get(self, rb_intern("@kmax"));
111
- VALUE rb_xsorted = rb_ivar_get(self, rb_intern("@xsorted"));
112
- VALUE rb_apply_bic_deviation = rb_ivar_get(self, rb_intern("@apply_bic_deviation"));
113
- uint32_t xcount = NUM2UINT(rb_xcount);
114
- uint32_t kmin = NUM2UINT(rb_kmin);
115
- uint32_t kmax = NUM2UINT(rb_kmax);
116
- bool apply_deviation = RTEST(rb_apply_bic_deviation);
117
- Arena *arena = arena_create(sizeof(int) * xcount * kmax * ALLOCATION_FACTOR);
108
+ VALUE rb_ckmeans_sorted_group_sizes(VALUE self)
109
+ {
110
+ uint32_t xcount = NUM2UINT(rb_iv_get(self, "@xcount"));
111
+ uint32_t kmin = NUM2UINT(rb_iv_get(self, "@kmin"));
112
+ uint32_t kmax = NUM2UINT(rb_iv_get(self, "@kmax"));
113
+ bool apply_deviation = RTEST(rb_iv_get(self, "@apply_bic_deviation"));
114
+ VALUE rb_xsorted = rb_iv_get(self, "@xsorted");
115
+
116
+ Arena *arena =
117
+ arena_create(
118
+ sizeof(LDouble) * xcount * (kmax + 4) +
119
+ sizeof(uint32_t) * xcount * kmax * 5 +
120
+ ARENA_MIN_CAPACITY
121
+ );
118
122
 
119
123
  if (arena == NULL) rb_raise(rb_eNoMemError, "Arena Memory Allocation Failed");
120
124
 
@@ -125,7 +129,7 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
125
129
  VectorF *xsumsq = vector_create_f(arena, xcount);
126
130
 
127
131
  for (uint32_t i = 0; i < xcount; i++) {
128
- long double xi = NUM2DBL(rb_ary_entry(rb_xsorted, i));
132
+ LDouble xi = NUM2DBL(rb_ary_entry(rb_xsorted, i));
129
133
  vector_set_f(xsorted, i, xi);
130
134
  }
131
135
 
@@ -143,17 +147,17 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
143
147
  };
144
148
 
145
149
 
146
- long double shift = vector_get_f(xsorted, xcount / 2);
147
- long double diff_initial = vector_get_f(xsorted, 0) - shift;
150
+ LDouble shift = vector_get_f(xsorted, xcount / 2);
151
+ LDouble diff_initial = vector_get_f(xsorted, 0) - shift;
148
152
 
149
153
  vector_set_f(xsum, 0, diff_initial);
150
154
  vector_set_f(xsumsq, 0, diff_initial * diff_initial);
151
155
 
152
156
  for (uint32_t i = 1; i < xcount; i++) {
153
- long double xi = vector_get_f(xsorted, i);
154
- long double xsum_prev = vector_get_f(xsum, i - 1);
155
- long double xsumsq_prev = vector_get_f(xsumsq, i - 1);
156
- long double diff = xi - shift;
157
+ LDouble xi = vector_get_f(xsorted, i);
158
+ LDouble xsum_prev = vector_get_f(xsum, i - 1);
159
+ LDouble xsumsq_prev = vector_get_f(xsumsq, i - 1);
160
+ LDouble diff = xi - shift;
157
161
 
158
162
  vector_set_f(xsum, i, xsum_prev + diff);
159
163
  vector_set_f(xsumsq, i, xsumsq_prev + diff * diff);
@@ -168,7 +172,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
168
172
 
169
173
  uint32_t koptimal = find_koptimal(state);
170
174
 
171
- VectorI *sizes = backtrack_sizes(state, koptimal);
175
+ VectorI *sizes = vector_create_i(arena, koptimal);
176
+ backtrack_sizes(state, sizes, koptimal);
172
177
 
173
178
  /* printf("XSORTED \t"); vector_inspect_f(xsorted); */
174
179
  /* printf("K OPTIMAL: %lld\n", koptimal); */
@@ -176,8 +181,8 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
176
181
  /* printf("FINAL COST\n"); matrix_inspect_f(cost); */
177
182
  /* printf("FINAL SPLITS\n"); matrix_inspect_i(splits); */
178
183
 
179
- VALUE response = rb_ary_new2(sizes->nvalues);
180
- for (uint32_t i = 0; i < sizes->nvalues; i++) {
184
+ VALUE response = rb_ary_new2(sizes->size);
185
+ for (uint32_t i = 0; i < sizes->size; i++) {
181
186
  VALUE size = LONG2NUM(vector_get_i(sizes, i));
182
187
  rb_ary_store(response, i, size);
183
188
  }
@@ -189,29 +194,30 @@ VALUE rb_ckmeans_sorted_group_sizes(VALUE self) {
189
194
 
190
195
  uint32_t find_koptimal(State state)
191
196
  {
192
- uint32_t kmin = state.kmin;
193
- uint32_t kmax = state.kmax;
194
- uint32_t xcount = state.xcount;
195
- uint32_t kopt = kmin;
196
- uint32_t xindex_max = state.xcount - 1;
197
- VectorF *xsorted = state.xsorted;
198
- long double x0 = vector_get_f(xsorted, 0);
199
- long double xn = vector_get_f(xsorted, xindex_max);
200
- long double max_bic = 0.0;
201
- long double adjustment = state.apply_deviation ? 0.0 : 1.0;
202
-
197
+ uint32_t kmin = state.kmin;
198
+ uint32_t kmax = state.kmax;
199
+ uint32_t xcount = state.xcount;
200
+ uint32_t kopt = kmin;
201
+ uint32_t xindex_max = state.xcount - 1;
202
+ VectorF *xsorted = state.xsorted;
203
+ LDouble x0 = vector_get_f(xsorted, 0);
204
+ LDouble xn = vector_get_f(xsorted, xindex_max);
205
+ LDouble max_bic = 0.0;
206
+ LDouble xcount_log = log((LDouble) xcount);
207
+
208
+ VectorI *sizes = vector_create_i(state.arena, kmax);
203
209
  for (uint32_t k = kmin; k <= kmax; k++) {
204
210
  uint32_t index_right, index_left = 0;
205
- long double bin_left, bin_right, loglikelihood = 0.0;
206
- VectorI *sizes = backtrack_sizes(state, k);
211
+ LDouble bin_left, bin_right, loglikelihood = 0.0;
212
+ backtrack_sizes(state, sizes, k);
207
213
 
208
214
  for (uint32_t kb = 0; kb < k; kb++) {
209
- uint32_t npoints = vector_get_i(sizes, kb);
210
- index_right = index_left + npoints - 1;
211
- long double xleft = vector_get_f(xsorted, index_left);
212
- long double xright = vector_get_f(xsorted, index_right);
213
- bin_left = xleft;
214
- bin_right = xright;
215
+ uint32_t npoints = vector_get_i(sizes, kb);
216
+ index_right = index_left + npoints - 1;
217
+ LDouble xleft = vector_get_f(xsorted, index_left);
218
+ LDouble xright = vector_get_f(xsorted, index_right);
219
+ bin_left = xleft;
220
+ bin_right = xright;
215
221
 
216
222
  if (xleft == xright) {
217
223
  bin_left = index_left == 0
@@ -222,18 +228,18 @@ uint32_t find_koptimal(State state)
222
228
  : xn;
223
229
  }
224
230
 
225
- long double bin_width = bin_right - bin_left;
226
- SegmentStats stats = shifted_data_variance(xsorted, index_left, index_right);
227
- long double mean = stats.mean;
228
- long double variance = stats.variance;
231
+ LDouble bin_width = bin_right - bin_left;
232
+ SegmentStats stats = shifted_data_variance(xsorted, index_left, index_right);
233
+ LDouble mean = stats.mean;
234
+ LDouble variance = stats.variance;
229
235
 
230
236
  if (variance > 0) {
231
237
  for (uint32_t i = index_left; i <= index_right; i++) {
232
- long double xi = vector_get_f(xsorted, i);
238
+ LDouble xi = vector_get_f(xsorted, i);
233
239
  loglikelihood += -(xi - mean) * (xi - mean) / (2.0 * variance);
234
240
  }
235
241
  loglikelihood += npoints * (
236
- (log(npoints / (long double) xcount) * adjustment) -
242
+ (state.apply_deviation ? 0.0 : log(npoints / (LDouble) xcount)) -
237
243
  (0.5 * log(PIx2 * variance))
238
244
  );
239
245
  } else {
@@ -243,24 +249,23 @@ uint32_t find_koptimal(State state)
243
249
  index_left = index_right + 1;
244
250
  }
245
251
 
246
- long double bic = (2.0 * loglikelihood) - (((3 * k) - 1) * log((long double) xcount));
252
+ LDouble bic = (2.0 * loglikelihood) - (((3 * k) - 1) * xcount_log);
247
253
 
248
254
  if (k == kmin) {
249
255
  max_bic = bic;
250
- kopt = kmin;
256
+ kopt = kmin;
251
257
  } else if (bic > max_bic) {
252
258
  max_bic = bic;
253
- kopt = k;
259
+ kopt = k;
254
260
  }
255
261
  }
256
262
 
257
263
  return kopt;
258
264
  }
259
265
 
260
- VectorI *backtrack_sizes(State state, uint32_t k)
266
+ VectorI *backtrack_sizes(State state, VectorI *sizes, uint32_t k)
261
267
  {
262
268
  MatrixI *splits = state.splits;
263
- VectorI *sizes = vector_create_i(state.arena, k);
264
269
  uint32_t xcount = state.xcount;
265
270
  uint32_t right = xcount - 1;
266
271
  uint32_t left = 0;
@@ -280,15 +285,15 @@ VectorI *backtrack_sizes(State state, uint32_t k)
280
285
  SegmentStats shifted_data_variance(VectorF *xsorted, uint32_t left, uint32_t right)
281
286
  {
282
287
  const uint32_t n = right - left + 1;
283
- long double sum = 0.0;
284
- long double sumsq = 0.0;
288
+ LDouble sum = 0.0;
289
+ LDouble sumsq = 0.0;
285
290
  SegmentStats stats = { .mean = 0.0, .variance = 0.0 };
286
291
 
287
292
  if (right >= left) {
288
- const long double median = vector_get_f(xsorted, (left + right) / 2);
293
+ const LDouble median = vector_get_f(xsorted, (left + right) / 2);
289
294
 
290
295
  for (uint32_t i = left; i <= right; i++) {
291
- const long double sumi = vector_get_f(xsorted, i) - median;
296
+ const LDouble sumi = vector_get_f(xsorted, i) - median;
292
297
 
293
298
  sum += sumi;
294
299
  sumsq += sumi * sumi;
@@ -341,7 +346,7 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat
341
346
  uint32_t imin = rparams.imin;
342
347
  uint32_t imax = rparams.imax;
343
348
  uint32_t istep = rparams.istep;
344
- uint32_t n = split_candidates->nvalues;
349
+ uint32_t n = split_candidates->size;
345
350
  uint32_t istepx2 = istep * 2;
346
351
  uint32_t jl = vector_get_i(split_candidates, 0);
347
352
  VectorF *xsum = state.xsum;
@@ -351,22 +356,22 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat
351
356
  for (uint32_t i = imin, r = 0; i <= imax; i += istepx2) {
352
357
  while (vector_get_i(split_candidates, r) < jl) r++;
353
358
 
354
- uint32_t rcandidate = vector_get_i(split_candidates, r);
359
+ uint32_t rcandidate = vector_get_i(split_candidates, r);
355
360
  uint32_t cost_base_row = row - 1;
356
361
  uint32_t cost_base_col = rcandidate - 1;
357
- long double cost =
362
+ LDouble cost =
358
363
  matrix_get_f(state.cost, cost_base_row, cost_base_col) + dissimilarity(rcandidate, i, xsum, xsumsq);
359
364
 
360
365
  matrix_set_f(state.cost, row, i, cost);
361
366
  matrix_set_i(state.splits, row, i, rcandidate);
362
367
 
363
- uint32_t jh =
368
+ uint32_t jh =
364
369
  (i + istep) <= imax
365
370
  ? matrix_get_i(splits, row, i + istep)
366
371
  : vector_get_i(split_candidates, n - 1);
367
372
 
368
- uint32_t jmax = jh < i ? jh : i;
369
- long double sjimin = dissimilarity(jmax, i, xsum, xsumsq);
373
+ uint32_t jmax = jh < i ? jh : i;
374
+ LDouble sjimin = dissimilarity(jmax, i, xsum, xsumsq);
370
375
 
371
376
  for (++r; r < n && vector_get_i(split_candidates, r) <= jmax; r++) {
372
377
  uint32_t jabs = vector_get_i(split_candidates, r);
@@ -374,9 +379,9 @@ void fill_even_positions(State state, RowParams rparams, VectorI *split_candidat
374
379
  if (jabs > i) break;
375
380
  if (jabs < matrix_get_i(splits, row - 1, i)) continue;
376
381
 
377
- long double cost_base = matrix_get_f(state.cost, row - 1, jabs - 1);
378
- long double sj = cost_base + dissimilarity(jabs, i, xsum, xsumsq);
379
- long double cost_prev = matrix_get_f(state.cost, row, i);
382
+ LDouble cost_base = matrix_get_f(state.cost, row - 1, jabs - 1);
383
+ LDouble sj = cost_base + dissimilarity(jabs, i, xsum, xsumsq);
384
+ LDouble cost_prev = matrix_get_f(state.cost, row, i);
380
385
 
381
386
  if (sj <= cost_prev) {
382
387
  matrix_set_f(state.cost, row, i, sj);
@@ -407,19 +412,19 @@ void find_min_from_candidates(State state, RowParams rparams, VectorI *split_can
407
412
  const uint32_t optimal_split_idx = optimal_split_idx_prev;
408
413
  const uint32_t optimal_split = vector_get_i(split_candidates, optimal_split_idx);
409
414
  const uint32_t cost_prev = matrix_get_f(cost, row - 1, optimal_split - 1);
410
- const long double added_cost = dissimilarity(optimal_split, i, state.xsum, state.xsumsq);
415
+ const LDouble added_cost = dissimilarity(optimal_split, i, state.xsum, state.xsumsq);
411
416
 
412
417
  matrix_set_f(cost, row, i, cost_prev + added_cost);
413
418
  matrix_set_i(splits, row, i, optimal_split);
414
419
 
415
- for (uint32_t r = optimal_split_idx + 1; r < split_candidates->nvalues; r++)
420
+ for (uint32_t r = optimal_split_idx + 1; r < split_candidates->size; r++)
416
421
  {
417
422
  uint32_t split = vector_get_i(split_candidates, r);
418
423
 
419
424
  if (split < matrix_get_i(splits, row - 1, i)) continue;
420
425
  if (split > i) break;
421
426
 
422
- long double split_cost =
427
+ LDouble split_cost =
423
428
  matrix_get_f(cost, row - 1, split - 1) + dissimilarity(split, i, state.xsum, state.xsumsq);
424
429
 
425
430
  if (split_cost > matrix_get_f(cost, row, i)) continue;
@@ -437,7 +442,7 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida
437
442
  uint32_t row = rparams.row;
438
443
  uint32_t istep = rparams.istep;
439
444
  uint32_t n = ((rparams.imax - imin) / istep) + 1;
440
- uint32_t m = split_candidates->nvalues;
445
+ uint32_t m = split_candidates->size;
441
446
 
442
447
  if (n >= m) return split_candidates;
443
448
 
@@ -447,12 +452,12 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida
447
452
 
448
453
  while (m > n)
449
454
  {
450
- uint32_t i = imin + left * istep;
451
- uint32_t j = vector_get_i(pruned, right);
452
- uint32_t jnext = vector_get_i(pruned, right + 1);
453
- long double sl =
455
+ uint32_t i = imin + left * istep;
456
+ uint32_t j = vector_get_i(pruned, right);
457
+ uint32_t jnext = vector_get_i(pruned, right + 1);
458
+ LDouble sl =
454
459
  matrix_get_f(state.cost, row - 1, j - 1) + dissimilarity(j, i, state.xsum, state.xsumsq);
455
- long double snext =
460
+ LDouble snext =
456
461
  matrix_get_f(state.cost, row - 1, jnext - 1) + dissimilarity(jnext, i, state.xsum, state.xsumsq);
457
462
 
458
463
  if ((sl < snext) && (left < n - 1)) {
@@ -483,86 +488,86 @@ VectorI *prune_candidates(State state, RowParams rparams, VectorI *split_candida
483
488
  return pruned;
484
489
  }
485
490
 
486
- long double dissimilarity(uint32_t j, uint32_t i, VectorF *xsum, VectorF *xsumsq) {
487
- long double sji = 0.0;
491
+ inline LDouble dissimilarity(uint32_t j, uint32_t i, VectorF *restrict xsum, VectorF *restrict xsumsq) {
492
+ LDouble sji = 0.0;
488
493
 
489
494
  if (j >= i) return sji;
490
495
 
491
496
  if (j > 0) {
492
- long double segment_diff = vector_get_diff_f(xsum, i, j - 1);
493
- uint32_t segment_size = i - j + 1;
494
- sji = vector_get_diff_f(xsumsq, i, j - 1) - (segment_diff * segment_diff / segment_size);
497
+ LDouble segment_diff = vector_get_diff_f(xsum, i, j - 1);
498
+ uint32_t segment_size = i - j + 1;
499
+ sji = vector_get_diff_f(xsumsq, i, j - 1) - (segment_diff * segment_diff / segment_size);
495
500
  } else {
496
- long double xsumi = vector_get_f(xsum, i);
497
- sji = vector_get_f(xsumsq, i) - (xsumi * xsumi / (i + 1));
501
+ LDouble xsumi = vector_get_f(xsum, i);
502
+ sji = vector_get_f(xsumsq, i) - (xsumi * xsumi / (i + 1));
498
503
  }
499
504
 
500
505
  return (sji > 0) ? sji : 0.0;
501
506
  }
502
507
 
503
- VectorF *vector_create_f(Arena *arena, uint32_t nvalues) {
508
+ inline VectorF *vector_create_f(Arena *arena, uint32_t size) {
504
509
  VectorF *v;
505
510
 
506
- v = arena_alloc(arena, sizeof(*v));
507
- v->values = arena_alloc(arena, sizeof(*(v->values)) * nvalues);
508
- v->nvalues = nvalues;
511
+ v = arena_alloc(arena, sizeof(*v));
512
+ v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
513
+ v->size = size;
509
514
 
510
515
  return v;
511
516
  }
512
517
 
513
- VectorI *vector_create_i(Arena *arena, uint32_t nvalues) {
518
+ inline VectorI *vector_create_i(Arena *arena, uint32_t size) {
514
519
  VectorI *v;
515
520
 
516
- v = arena_alloc(arena, sizeof(*v));
517
- v->values = arena_alloc(arena, sizeof(*(v->values)) * nvalues);
518
- v->nvalues = nvalues;
521
+ v = arena_alloc(arena, sizeof(*v));
522
+ v->values = arena_alloc(arena, sizeof(*(v->values)) * size);
523
+ v->size = size;
519
524
 
520
525
  return v;
521
526
  }
522
527
 
523
- VectorI *vector_dup_i(VectorI *v, Arena *arena)
528
+ inline VectorI *vector_dup_i(VectorI *v, Arena *arena)
524
529
  {
525
- VectorI *vdup = vector_create_i(arena, v->nvalues);
530
+ VectorI *vdup = vector_create_i(arena, v->size);
526
531
 
527
- memcpy(vdup->values, v->values, sizeof(*(v->values)) * v->nvalues);
532
+ memcpy(vdup->values, v->values, sizeof(*(v->values)) * v->size);
528
533
 
529
534
  return vdup;
530
535
  }
531
536
 
532
- void vector_set_f(VectorF *v, uint32_t offset, long double value) {
537
+ inline void vector_set_f(VectorF *v, uint32_t offset, LDouble value) {
533
538
  *(v->values + offset) = value;
534
539
  }
535
540
 
536
- void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
541
+ inline void vector_set_i(VectorI *v, uint32_t offset, uint32_t value) {
537
542
  *(v->values + offset) = value;
538
543
  }
539
544
 
540
- uint32_t vector_get_i(VectorI *v, uint32_t offset) {
545
+ inline uint32_t vector_get_i(VectorI *v, uint32_t offset) {
541
546
  return *(v->values + offset);
542
547
  }
543
548
 
544
- void vector_downsize_i(VectorI *v, uint32_t new_size) {
545
- v->nvalues = new_size;
549
+ inline void vector_downsize_i(VectorI *v, uint32_t new_size) {
550
+ v->size = new_size;
546
551
  }
547
552
 
548
553
  void vector_inspect_i(VectorI *v) {
549
- for (uint32_t i = 0; i < v->nvalues - 1; i++)
554
+ for (uint32_t i = 0; i < v->size - 1; i++)
550
555
  printf("%u, ", vector_get_i(v, i));
551
- printf("%u\n", vector_get_i(v, v->nvalues - 1));
556
+ printf("%u\n", vector_get_i(v, v->size - 1));
552
557
  }
553
558
 
554
- long double vector_get_f(VectorF *v, uint32_t offset) {
559
+ inline LDouble vector_get_f(VectorF *v, uint32_t offset) {
555
560
  return *(v->values + offset);
556
561
  }
557
562
 
558
- long double vector_get_diff_f(VectorF *v, uint32_t i, uint32_t j) {
563
+ inline LDouble vector_get_diff_f(VectorF *v, uint32_t i, uint32_t j) {
559
564
  return *(v->values + i) - *(v->values + j);
560
565
  }
561
566
 
562
567
  void vector_inspect_f(VectorF *v) {
563
- for (uint32_t i = 0; i < v->nvalues - 1; i++)
568
+ for (uint32_t i = 0; i < v->size - 1; i++)
564
569
  printf("%Lf, ", vector_get_f(v, i));
565
- printf("%Lf\n", vector_get_f(v, v->nvalues - 1));
570
+ printf("%Lf\n", vector_get_f(v, v->size - 1));
566
571
  }
567
572
 
568
573
  MatrixF *matrix_create_f(Arena *arena, uint32_t nrows, uint32_t ncols) {
@@ -587,12 +592,12 @@ MatrixI *matrix_create_i(Arena *arena, uint32_t nrows, uint32_t ncols) {
587
592
  return m;
588
593
  }
589
594
 
590
- void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j, long double value) {
595
+ inline void matrix_set_f(MatrixF *m, uint32_t i, uint32_t j, LDouble value) {
591
596
  uint32_t offset = i * m->ncols + j;
592
597
  *(m->values + offset) = value;
593
598
  }
594
599
 
595
- long double matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
600
+ inline LDouble matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
596
601
  uint32_t offset = i * m->ncols + j;
597
602
  return *(m->values + offset);
598
603
  }
@@ -600,7 +605,7 @@ long double matrix_get_f(MatrixF *m, uint32_t i, uint32_t j) {
600
605
  void matrix_inspect_f(MatrixF *m) {
601
606
  for (uint32_t i = 0; i < m->nrows; i++) {
602
607
  for (uint32_t j = 0; j < m->ncols - 1; j++) {
603
- long double value = matrix_get_f(m, i, j);
608
+ LDouble value = matrix_get_f(m, i, j);
604
609
 
605
610
  printf("%Lf, ", value);
606
611
  }
@@ -616,17 +621,17 @@ void matrix_inspect_i(MatrixI *m) {
616
621
  }
617
622
  }
618
623
 
619
- void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
624
+ inline void matrix_set_i(MatrixI *m, uint32_t i, uint32_t j, uint32_t value) {
620
625
  uint32_t offset = i * m->ncols + j;
621
626
  *(m->values + offset) = value;
622
627
  }
623
628
 
624
- uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
629
+ inline uint32_t matrix_get_i(MatrixI *m, uint32_t i, uint32_t j) {
625
630
  uint32_t offset = i * m->ncols + j;
626
631
  return *(m->values + offset);
627
632
  }
628
633
 
629
- Arena *arena_create(uint32_t capacity) {
634
+ Arena *arena_create(size_t capacity) {
630
635
  if (capacity < ARENA_MIN_CAPACITY) {
631
636
  capacity = ARENA_MIN_CAPACITY;
632
637
  }
@@ -654,7 +659,7 @@ Arena *arena_create(uint32_t capacity) {
654
659
  return arena;
655
660
  }
656
661
 
657
- void *arena_alloc(Arena *arena, uint32_t size) {
662
+ void *arena_alloc(Arena *arena, size_t size) {
658
663
  size = (size + 7) & ~7;
659
664
 
660
665
  if (arena->offset + size > arena->capacity) {
@@ -669,7 +674,8 @@ void *arena_alloc(Arena *arena, uint32_t size) {
669
674
  }
670
675
 
671
676
  void arena_destroy(Arena *arena) {
672
- /* printf("[Arena Destroy] Capacity: %u, offset: %u, left: %u\n", arena->capacity, arena->offset, arena->capacity - arena->offset); */
677
+ /* double leftover = ((double) arena->capacity - arena->offset) / arena->capacity * 100; */
678
+ /* printf("[Arena Destroy] Capacity: %zu, offset: %zu, left: %2.2f%%\n", arena->capacity, arena->offset, leftover); */
673
679
  free(arena->buffer);
674
680
  free(arena);
675
681
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ckmeans
4
- VERSION = "1.0.2"
4
+ VERSION = "1.0.3"
5
5
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ckmeans
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vlad Lebedev
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-04-24 00:00:00.000000000 Z
10
+ date: 2025-05-01 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Repeatable clustering of unidimensional data
13
13
  email:
@@ -17,11 +17,13 @@ extensions:
17
17
  - ext/ckmeans/extconf.rb
18
18
  extra_rdoc_files: []
19
19
  files:
20
+ - ".dockerignore"
20
21
  - ".rspec"
21
22
  - ".rubocop.yml"
22
23
  - ".rubocop_todo.yml"
23
24
  - ".ruby-version"
24
25
  - CHANGELOG.md
26
+ - Dockerfile
25
27
  - LICENSE
26
28
  - README.md
27
29
  - Rakefile