monotonic_grouper 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e62903ce3c859e0afe9812bcd66fb9125e92f730b8bb8bd0b368c6ace2ab385
4
- data.tar.gz: c0a730777117df24089ccfbb799c152a54822ca1be1b9f7f84e27699d83d4fce
3
+ metadata.gz: a150a2735dc99df4715193f8075ad6a739723c170a25bc9ae61d628e50a8661f
4
+ data.tar.gz: eaa156e8d8f882969a5f9b07f54e4353c996a7adaf22181061e7ff80d0e7309e
5
5
  SHA512:
6
- metadata.gz: ad1a76ec922ef77511a8b7ed5ecc9fbd5325411c9e5cfec491ae0ba651026486e550f0242125c5ddb0d5f4d05b8b1b23ae608efe7d86f1f0a8860981783a7c4c
7
- data.tar.gz: ddfeddfebb196f6140631ebc492c83704191e75399f2a8586c1a1990d6829213c368796b0fcabe189753a4b187e22ed02d46f6f3b99e41609ae8099acc6e73eb
6
+ metadata.gz: 884a86506cff344b18254dbbaccb73ac5311ea98031995a6ada6e2b299e93c03170b0b2e3e513bfdd878a36769441fc043c390e41985011fa1db04cb86995f72
7
+ data.tar.gz: f2675721e13770fafbd540b82ad6d5b9f33efd1ede32e080b26450d2b5088177b831d985e52bd24aa9f9dbcfdb9ae2ac37df74b0755c756de2a9bdd7a148ba6c
data/README.md CHANGED
@@ -11,14 +11,15 @@ Fast C extension for grouping monotonic sequences in Ruby arrays. Groups consecu
11
11
  - 💎 **Ruby-friendly**: Seamless integration as Array method
12
12
  - 🚀 **Optimized Date Processing**: Special fast path for Date objects with cached Julian Day calculations
13
13
 
14
- ## Recent Updates (v1.0.2)
14
+ ## Recent Updates (v1.0.3 - Stable)
15
15
 
16
- ### Bug Fixes
17
- - **Critical**: Fixed first element being incorrectly skipped in all processing paths
18
- - Fixed loop initialization bug that caused incorrect grouping results
16
+ ### Bug Fixes (v1.0.3)
17
+ - **Critical**: Fixed `group_monotonic` method not being available on Array class in external applications
18
+ - Properly encapsulated the method in Array class through C extension
19
19
 
20
- ### Performance Improvements
21
- - **2x faster Date processing**: Optimized by caching Julian Day Numbers (only 1 Ruby call per iteration instead of 2)
20
+ ### Previous Updates (v1.0.2)
21
+ - **Critical**: Fixed first element being incorrectly skipped in all processing paths
22
+ - **2x faster Date processing**: Optimized by caching Julian Day Numbers
22
23
  - Faster array access using `RARRAY_AREF` macro
23
24
  - Better memory preallocation
24
25
  - Improved Date subclass handling (DateTime, custom Date classes)
@@ -42,15 +43,15 @@ require 'monotonic_grouper'
42
43
 
43
44
  # Basic integer usage
44
45
  [1, 2, 3, 4, 5, 10, 11, 12].group_monotonic(3)
45
- # => [2..5, 10..12]
46
+ # => [1..5, 10..12]
46
47
 
47
48
  # With singles (sequences shorter than min_range_size)
48
49
  [1, 2, 3, 4, 7, 9, 10, 11, 12].group_monotonic(3)
49
- # => [2..4, 7, 9, 10..12]
50
+ # => [1..4, 7, 9, 10..12]
50
51
 
51
52
  # Custom minimum range size
52
53
  [1, 2, 3, 5, 6, 8].group_monotonic(2)
53
- # => [2..3, 5..6, 8]
54
+ # => [1..3, 5..6, 8]
54
55
 
55
56
  # Works with Dates
56
57
  require 'date'
@@ -63,12 +64,12 @@ dates = [
63
64
  Date.new(2024, 1, 7)
64
65
  ]
65
66
  dates.group_monotonic(3)
66
- # => [Date.new(2024, 1, 2)..Date.new(2024, 1, 3),
67
+ # => [Date.new(2024, 1, 1)..Date.new(2024, 1, 3),
67
68
  # Date.new(2024, 1, 5)..Date.new(2024, 1, 7)]
68
69
 
69
70
  # Works with characters
70
71
  ['a', 'b', 'c', 'd', 'f', 'g', 'h'].group_monotonic(3)
71
- # => ['b'..'d', 'f'..'h']
72
+ # => ['a'..'d', 'f'..'h']
72
73
  ```
73
74
 
74
75
  ## API
@@ -5,200 +5,220 @@
5
5
  #define RB_BIGNUM_TYPE_P(obj) (RB_TYPE_P((obj), T_BIGNUM))
6
6
  #endif
7
7
 
8
+ static inline void seal_array_len(VALUE ary, VALUE *buf, long pos) {
9
+ if (pos == 0)
10
+ return;
11
+ rb_ary_store(ary, pos - 1, buf[pos - 1]);
12
+ }
13
+
8
14
  static VALUE rb_mMonotonicGrouper;
9
15
  static VALUE rb_cDate;
10
16
  static ID id_succ;
11
- static ID id_eq;
12
17
  static ID id_jd;
13
18
 
14
- static inline int
15
- is_next_integer(VALUE a, VALUE b)
16
- {
17
- if (FIXNUM_P(a) && FIXNUM_P(b)) {
18
- long av = FIX2LONG(a);
19
- long bv = FIX2LONG(b);
20
- return bv == av + 1;
21
- }
22
- return 0;
23
- }
19
+ #define CHECK_ARRAY_MUTATION(ary, expected_len) \
20
+ do { \
21
+ if (RARRAY_LEN(ary) != (expected_len)) { \
22
+ rb_raise(rb_eRuntimeError, "array was modified during iteration"); \
23
+ } \
24
+ } while (0)
24
25
 
25
- static inline int
26
- is_next_in_sequence_generic(VALUE a, VALUE b)
27
- {
28
- VALUE succ_a = rb_funcall(a, id_succ, 0);
29
- return RTEST(rb_funcall(b, id_eq, 1, succ_a));
30
- }
31
-
32
- static void
33
- add_group_to_result_integer(VALUE result, VALUE group_start, VALUE group_end, long size, long min_range_size)
34
- {
35
- if (size >= min_range_size) {
36
- VALUE range = rb_range_new(group_start, group_end, 0);
37
- rb_ary_push(result, range);
38
- } else {
39
- long j;
40
- long start_val = FIX2LONG(group_start);
41
- for (j = 0; j < size; j++) {
42
- rb_ary_push(result, LONG2FIX(start_val + j));
43
- }
44
- }
45
- }
26
+ static VALUE process_fixnum_array(VALUE self, long len, long min_range_size) {
27
+ const VALUE *ptr = RARRAY_CONST_PTR(self);
28
+ const VALUE *end_ptr = ptr + len;
46
29
 
47
- static void
48
- add_group_to_result_date(VALUE result, VALUE group_start, VALUE group_end, long size, long min_range_size)
49
- {
50
- if (size >= min_range_size) {
51
- VALUE range = rb_range_new(group_start, group_end, 0);
52
- rb_ary_push(result, range);
53
- } else {
54
- long j;
55
- VALUE curr = group_start;
56
- rb_ary_push(result, curr);
57
- for (j = 1; j < size; j++) {
58
- curr = rb_funcall(curr, id_succ, 0);
59
- rb_ary_push(result, curr);
60
- }
61
- }
62
- }
63
-
64
- static void
65
- add_group_to_result_generic(VALUE result, VALUE group_start, VALUE group_end, long size, long min_range_size)
66
- {
67
- if (size >= min_range_size) {
68
- VALUE range = rb_range_new(group_start, group_end, 0);
69
- rb_ary_push(result, range);
70
- } else {
71
- long j;
72
- VALUE curr = group_start;
73
- rb_ary_push(result, curr);
74
- for (j = 1; j < size; j++) {
75
- curr = rb_funcall(curr, id_succ, 0);
76
- rb_ary_push(result, curr);
77
- }
78
- }
79
- }
80
-
81
- static VALUE
82
- process_integer_array(VALUE self, long len, long min_range_size)
83
- {
84
30
  VALUE result = rb_ary_new_capa(len);
85
- VALUE first_elem = RARRAY_AREF(self, 0);
86
- VALUE group_start = first_elem;
87
- VALUE group_end = first_elem;
88
- VALUE prev_value = first_elem;
89
- long current_size = 1;
90
- long i;
31
+ VALUE *out = RARRAY_PTR(result);
32
+ long pos = 0;
91
33
 
92
- for (i = 1; i < len; i++) {
93
- VALUE curr_value = RARRAY_AREF(self, i);
34
+ long group_start = FIX2LONG(*ptr);
35
+ long prev = group_start;
36
+ const VALUE *p;
94
37
 
95
- if (!FIXNUM_P(curr_value) && !RB_BIGNUM_TYPE_P(curr_value)) {
38
+ for (p = ptr + 1; p < end_ptr; p++) {
39
+ VALUE raw = *p;
40
+
41
+ if (!FIXNUM_P(raw)) {
96
42
  rb_raise(rb_eTypeError, "All elements must be of the same type");
97
43
  }
98
44
 
99
- if (is_next_integer(prev_value, curr_value)) {
100
- group_end = curr_value;
101
- current_size++;
45
+ long curr = FIX2LONG(raw);
46
+
47
+ if (curr == prev + 1) {
48
+ prev = curr;
102
49
  } else {
103
- add_group_to_result_integer(result, group_start, group_end, current_size, min_range_size);
104
- group_start = curr_value;
105
- group_end = curr_value;
106
- current_size = 1;
50
+ long size = prev - group_start + 1;
51
+ if (size >= min_range_size) {
52
+ out[pos++] = rb_range_new(LONG2FIX(group_start), LONG2FIX(prev), 0);
53
+ } else {
54
+ long v;
55
+ for (v = group_start; v <= prev; v++) {
56
+ out[pos++] = LONG2FIX(v);
57
+ }
58
+ }
59
+ group_start = curr;
60
+ prev = curr;
107
61
  }
108
-
109
- prev_value = curr_value;
110
62
  }
111
63
 
112
- add_group_to_result_integer(result, group_start, group_end, current_size, min_range_size);
64
+ {
65
+ long size = prev - group_start + 1;
66
+ if (size >= min_range_size) {
67
+ out[pos++] = rb_range_new(LONG2FIX(group_start), LONG2FIX(prev), 0);
68
+ } else {
69
+ long v;
70
+ for (v = group_start; v <= prev; v++) {
71
+ out[pos++] = LONG2FIX(v);
72
+ }
73
+ }
74
+ }
113
75
 
76
+ seal_array_len(result, out, pos);
114
77
  return result;
115
78
  }
116
79
 
117
- static VALUE
118
- process_date_array(VALUE self, long len, long min_range_size, VALUE first_elem)
119
- {
80
+ static VALUE process_date_array(VALUE self, long len, long min_range_size, VALUE first_elem) {
120
81
  VALUE result = rb_ary_new_capa(len);
82
+ VALUE *out = RARRAY_PTR(result);
83
+ long pos = 0;
84
+
121
85
  VALUE group_start = first_elem;
122
- VALUE group_end = first_elem;
86
+ VALUE prev_value = first_elem;
123
87
  VALUE first_class = CLASS_OF(first_elem);
124
- long current_size = 1;
125
- long i;
126
-
127
- /* Cache the jd of the previous element to avoid double rb_funcall */
128
88
  long prev_jd = NUM2LONG(rb_funcall(first_elem, id_jd, 0));
89
+ long group_start_jd = prev_jd;
90
+ long i;
129
91
 
130
92
  for (i = 1; i < len; i++) {
93
+ CHECK_ARRAY_MUTATION(self, len);
94
+
131
95
  VALUE curr_value = RARRAY_AREF(self, i);
96
+
132
97
  if (CLASS_OF(curr_value) != first_class) {
133
98
  rb_raise(rb_eTypeError, "All elements must be of the same type");
134
99
  }
135
100
 
136
101
  long curr_jd = NUM2LONG(rb_funcall(curr_value, id_jd, 0));
137
-
102
+ CHECK_ARRAY_MUTATION(self, len);
103
+
138
104
  if (curr_jd == prev_jd + 1) {
139
- group_end = curr_value;
140
- current_size++;
105
+ prev_value = curr_value;
106
+ prev_jd = curr_jd;
141
107
  } else {
142
- add_group_to_result_date(result, group_start, group_end, current_size, min_range_size);
108
+ long size = prev_jd - group_start_jd + 1;
109
+ if (size >= min_range_size) {
110
+ out[pos++] = rb_range_new(group_start, prev_value, 0);
111
+ } else {
112
+ VALUE curr = group_start;
113
+ out[pos++] = curr;
114
+ long j;
115
+ for (j = 1; j < size; j++) {
116
+ curr = rb_funcall(curr, id_succ, 0);
117
+ CHECK_ARRAY_MUTATION(self, len);
118
+ out[pos++] = curr;
119
+ }
120
+ }
143
121
  group_start = curr_value;
144
- group_end = curr_value;
145
- current_size = 1;
122
+ group_start_jd = curr_jd;
123
+ prev_value = curr_value;
124
+ prev_jd = curr_jd;
146
125
  }
147
-
148
- prev_jd = curr_jd;
149
126
  }
150
127
 
151
- add_group_to_result_date(result, group_start, group_end, current_size, min_range_size);
128
+ {
129
+ long size = prev_jd - group_start_jd + 1;
130
+ if (size >= min_range_size) {
131
+ out[pos++] = rb_range_new(group_start, prev_value, 0);
132
+ } else {
133
+ VALUE curr = group_start;
134
+ out[pos++] = curr;
135
+ long j;
136
+ for (j = 1; j < size; j++) {
137
+ curr = rb_funcall(curr, id_succ, 0);
138
+ CHECK_ARRAY_MUTATION(self, len);
139
+ out[pos++] = curr;
140
+ }
141
+ }
142
+ }
152
143
 
144
+ seal_array_len(result, out, pos);
153
145
  return result;
154
146
  }
155
147
 
156
- static VALUE
157
- process_generic_array(VALUE self, long len, long min_range_size, VALUE first_elem)
158
- {
148
+ static VALUE process_generic_array(VALUE self, long len, long min_range_size, VALUE first_elem) {
159
149
  VALUE result = rb_ary_new_capa(len);
150
+ VALUE *out = RARRAY_PTR(result);
151
+ long pos = 0;
152
+
160
153
  VALUE group_start = first_elem;
161
- VALUE group_end = first_elem;
162
154
  VALUE prev_value = first_elem;
163
155
  VALUE first_class = CLASS_OF(first_elem);
164
156
  long current_size = 1;
165
157
  long i;
166
158
 
167
159
  for (i = 1; i < len; i++) {
160
+ CHECK_ARRAY_MUTATION(self, len);
161
+
168
162
  VALUE curr_value = RARRAY_AREF(self, i);
163
+
169
164
  if (CLASS_OF(curr_value) != first_class) {
170
165
  rb_raise(rb_eTypeError, "All elements must be of the same type");
171
166
  }
172
167
 
173
- if (is_next_in_sequence_generic(prev_value, curr_value)) {
174
- group_end = curr_value;
168
+ VALUE succ_prev = rb_funcall(prev_value, id_succ, 0);
169
+ CHECK_ARRAY_MUTATION(self, len);
170
+
171
+ if (RTEST(rb_equal(curr_value, succ_prev))) {
175
172
  current_size++;
176
173
  } else {
177
- add_group_to_result_generic(result, group_start, group_end, current_size, min_range_size);
174
+ if (current_size >= min_range_size) {
175
+ out[pos++] = rb_range_new(group_start, prev_value, 0);
176
+ } else {
177
+ VALUE curr = group_start;
178
+ out[pos++] = curr;
179
+ long j;
180
+ for (j = 1; j < current_size; j++) {
181
+ curr = rb_funcall(curr, id_succ, 0);
182
+ CHECK_ARRAY_MUTATION(self, len);
183
+ out[pos++] = curr;
184
+ }
185
+ }
178
186
  group_start = curr_value;
179
- group_end = curr_value;
180
187
  current_size = 1;
181
188
  }
182
189
 
183
190
  prev_value = curr_value;
184
191
  }
185
192
 
186
- add_group_to_result_generic(result, group_start, group_end, current_size, min_range_size);
193
+ if (current_size >= min_range_size) {
194
+ out[pos++] = rb_range_new(group_start, prev_value, 0);
195
+ } else {
196
+ VALUE curr = group_start;
197
+ out[pos++] = curr;
198
+ long j;
199
+ for (j = 1; j < current_size; j++) {
200
+ curr = rb_funcall(curr, id_succ, 0);
201
+ CHECK_ARRAY_MUTATION(self, len);
202
+ out[pos++] = curr;
203
+ }
204
+ }
187
205
 
206
+ seal_array_len(result, out, pos);
188
207
  return result;
189
208
  }
190
209
 
191
- static VALUE
192
- rb_array_group_monotonic(int argc, VALUE *argv, VALUE self)
193
- {
194
- VALUE min_range_size_val;
210
+ static VALUE rb_array_group_monotonic(int argc, VALUE *argv, VALUE self) {
195
211
  long min_range_size;
196
212
  long len;
197
213
  VALUE first_elem;
198
- VALUE first_class;
199
214
 
200
- rb_scan_args(argc, argv, "01", &min_range_size_val);
201
- min_range_size = NIL_P(min_range_size_val) ? 3 : NUM2LONG(min_range_size_val);
215
+ if (argc == 0) {
216
+ min_range_size = 3;
217
+ } else if (argc == 1) {
218
+ min_range_size = NUM2LONG(argv[0]);
219
+ } else {
220
+ rb_raise(rb_eArgError, "wrong number of arguments (given %d, expected 0..1)", argc);
221
+ }
202
222
 
203
223
  if (min_range_size < 1) {
204
224
  rb_raise(rb_eArgError, "min_range_size must be at least 1");
@@ -211,19 +231,15 @@ rb_array_group_monotonic(int argc, VALUE *argv, VALUE self)
211
231
  }
212
232
 
213
233
  first_elem = RARRAY_AREF(self, 0);
214
- first_class = CLASS_OF(first_elem);
215
234
 
216
- /* Fast path for integers */
217
- if (FIXNUM_P(first_elem) || RB_BIGNUM_TYPE_P(first_elem)) {
218
- return process_integer_array(self, len, min_range_size);
235
+ if (FIXNUM_P(first_elem)) {
236
+ return process_fixnum_array(self, len, min_range_size);
219
237
  }
220
238
 
221
- /* Fast path for Date objects - use rb_obj_is_kind_of for safer checking */
222
239
  if (rb_cDate != Qnil && rb_obj_is_kind_of(first_elem, rb_cDate)) {
223
240
  return process_date_array(self, len, min_range_size, first_elem);
224
241
  }
225
242
 
226
- /* Generic path for other Comparable types */
227
243
  if (!rb_respond_to(first_elem, id_succ)) {
228
244
  rb_raise(rb_eTypeError, "Elements must respond to :succ method");
229
245
  }
@@ -231,30 +247,20 @@ rb_array_group_monotonic(int argc, VALUE *argv, VALUE self)
231
247
  return process_generic_array(self, len, min_range_size, first_elem);
232
248
  }
233
249
 
234
- static VALUE
235
- get_date_class(VALUE obj)
236
- {
250
+ static VALUE get_date_class(VALUE obj) {
237
251
  return rb_const_get(obj, rb_intern("Date"));
238
252
  }
239
253
 
240
- void
241
- Init_monotonic_grouper(void)
242
- {
254
+ void Init_monotonic_grouper(void) {
243
255
  int state = 0;
244
-
245
256
  id_succ = rb_intern("succ");
246
- id_eq = rb_intern("==");
247
257
  id_jd = rb_intern("jd");
248
-
249
258
  rb_mMonotonicGrouper = rb_define_module("MonotonicGrouper");
250
-
251
- /* Try to get Date class if it's loaded - use rb_protect to handle NameError */
252
259
  rb_cDate = rb_protect(get_date_class, rb_cObject, &state);
253
260
  if (state != 0) {
254
- /* Date class not found, will use generic path */
255
261
  rb_cDate = Qnil;
256
- rb_set_errinfo(Qnil); /* Clear the error */
262
+ rb_set_errinfo(Qnil);
257
263
  }
258
-
264
+
259
265
  rb_define_method(rb_cArray, "group_monotonic", rb_array_group_monotonic, -1);
260
266
  }
@@ -1,3 +1,3 @@
1
1
  module MonotonicGrouper
2
- VERSION = "1.0.2"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -1,4 +1,4 @@
1
- require 'monotonic_grouper/version'
1
+ require_relative 'monotonic_grouper/version'
2
2
 
3
3
  # Load the compiled extension (.so on Linux, .bundle on macOS)
4
4
  begin
@@ -14,37 +14,3 @@ rescue LoadError
14
14
  raise LoadError, "Could not find compiled extension"
15
15
  end
16
16
  end
17
-
18
- module FastBloomFilter
19
- class Filter
20
- def add_all(items)
21
- items.each { |item| add(item.to_s) }
22
- self
23
- end
24
-
25
- def count_possible_matches(items)
26
- items.count { |item| include?(item.to_s) }
27
- end
28
-
29
- def inspect
30
- s = stats
31
- total_kb = (s[:total_bytes] / 1024.0).round(2)
32
- fill_pct = (s[:fill_ratio] * 100).round(2)
33
-
34
- "#<FastBloomFilter::Filter v2 layers=#{s[:num_layers]} " \
35
- "count=#{s[:total_count]} size=#{total_kb}KB fill=#{fill_pct}%>"
36
- end
37
-
38
- def to_s
39
- inspect
40
- end
41
- end
42
-
43
- def self.for_emails(error_rate: 0.001, initial_capacity: 10_000)
44
- Filter.new(error_rate: error_rate, initial_capacity: initial_capacity)
45
- end
46
-
47
- def self.for_urls(error_rate: 0.01, initial_capacity: 10_000)
48
- Filter.new(error_rate: error_rate, initial_capacity: initial_capacity)
49
- end
50
- end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: monotonic_grouper
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roman Hajdarov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-02-13 00:00:00.000000000 Z
11
+ date: 2026-03-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Groups consecutive monotonic sequences in arrays into ranges. Supports
14
14
  any Comparable type with succ method.
@@ -44,7 +44,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
46
  requirements: []
47
- rubygems_version: 3.4.22
47
+ rubygems_version: 3.3.27
48
48
  signing_key:
49
49
  specification_version: 4
50
50
  summary: Fast C extension for grouping monotonic sequences