monotonic_grouper 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -0
- data/ext/monotonic_grouper/monotonic_grouper.c +93 -6
- data/lib/monotonic_grouper/monotonic_grouper.bundle +0 -0
- data/lib/monotonic_grouper/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7e62903ce3c859e0afe9812bcd66fb9125e92f730b8bb8bd0b368c6ace2ab385
|
|
4
|
+
data.tar.gz: c0a730777117df24089ccfbb799c152a54822ca1be1b9f7f84e27699d83d4fce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ad1a76ec922ef77511a8b7ed5ecc9fbd5325411c9e5cfec491ae0ba651026486e550f0242125c5ddb0d5f4d05b8b1b23ae608efe7d86f1f0a8860981783a7c4c
|
|
7
|
+
data.tar.gz: ddfeddfebb196f6140631ebc492c83704191e75399f2a8586c1a1990d6829213c368796b0fcabe189753a4b187e22ed02d46f6f3b99e41609ae8099acc6e73eb
|
data/README.md
CHANGED
|
@@ -9,6 +9,19 @@ Fast C extension for grouping monotonic sequences in Ruby arrays. Groups consecu
|
|
|
9
9
|
- 📅 **Multiple Types**: Integers, Dates, Characters, and more
|
|
10
10
|
- 🎯 **Configurable**: Adjustable minimum range size
|
|
11
11
|
- 💎 **Ruby-friendly**: Seamless integration as Array method
|
|
12
|
+
- 🚀 **Optimized Date Processing**: Special fast path for Date objects with cached Julian Day calculations
|
|
13
|
+
|
|
14
|
+
## Recent Updates (v1.0.2)
|
|
15
|
+
|
|
16
|
+
### Bug Fixes
|
|
17
|
+
- **Critical**: Fixed first element being incorrectly skipped in all processing paths
|
|
18
|
+
- Fixed loop initialization bug that caused incorrect grouping results
|
|
19
|
+
|
|
20
|
+
### Performance Improvements
|
|
21
|
+
- **2x faster Date processing**: Optimized by caching Julian Day Numbers (only 1 Ruby call per iteration instead of 2)
|
|
22
|
+
- Faster array access using `RARRAY_AREF` macro
|
|
23
|
+
- Better memory preallocation
|
|
24
|
+
- Improved Date subclass handling (DateTime, custom Date classes)
|
|
12
25
|
|
|
13
26
|
## Installation
|
|
14
27
|
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
#include <ruby.h>
|
|
2
|
+
#include <ruby/encoding.h>
|
|
2
3
|
|
|
3
4
|
#ifndef RB_BIGNUM_TYPE_P
|
|
4
5
|
#define RB_BIGNUM_TYPE_P(obj) (RB_TYPE_P((obj), T_BIGNUM))
|
|
5
6
|
#endif
|
|
6
7
|
|
|
7
8
|
static VALUE rb_mMonotonicGrouper;
|
|
9
|
+
static VALUE rb_cDate;
|
|
8
10
|
static ID id_succ;
|
|
9
11
|
static ID id_eq;
|
|
12
|
+
static ID id_jd;
|
|
10
13
|
|
|
11
14
|
static inline int
|
|
12
15
|
is_next_integer(VALUE a, VALUE b)
|
|
@@ -41,6 +44,23 @@ add_group_to_result_integer(VALUE result, VALUE group_start, VALUE group_end, lo
|
|
|
41
44
|
}
|
|
42
45
|
}
|
|
43
46
|
|
|
47
|
+
static void
|
|
48
|
+
add_group_to_result_date(VALUE result, VALUE group_start, VALUE group_end, long size, long min_range_size)
|
|
49
|
+
{
|
|
50
|
+
if (size >= min_range_size) {
|
|
51
|
+
VALUE range = rb_range_new(group_start, group_end, 0);
|
|
52
|
+
rb_ary_push(result, range);
|
|
53
|
+
} else {
|
|
54
|
+
long j;
|
|
55
|
+
VALUE curr = group_start;
|
|
56
|
+
rb_ary_push(result, curr);
|
|
57
|
+
for (j = 1; j < size; j++) {
|
|
58
|
+
curr = rb_funcall(curr, id_succ, 0);
|
|
59
|
+
rb_ary_push(result, curr);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
44
64
|
static void
|
|
45
65
|
add_group_to_result_generic(VALUE result, VALUE group_start, VALUE group_end, long size, long min_range_size)
|
|
46
66
|
{
|
|
@@ -61,8 +81,8 @@ add_group_to_result_generic(VALUE result, VALUE group_start, VALUE group_end, lo
|
|
|
61
81
|
static VALUE
|
|
62
82
|
process_integer_array(VALUE self, long len, long min_range_size)
|
|
63
83
|
{
|
|
64
|
-
VALUE result =
|
|
65
|
-
VALUE first_elem =
|
|
84
|
+
VALUE result = rb_ary_new_capa(len);
|
|
85
|
+
VALUE first_elem = RARRAY_AREF(self, 0);
|
|
66
86
|
VALUE group_start = first_elem;
|
|
67
87
|
VALUE group_end = first_elem;
|
|
68
88
|
VALUE prev_value = first_elem;
|
|
@@ -70,7 +90,7 @@ process_integer_array(VALUE self, long len, long min_range_size)
|
|
|
70
90
|
long i;
|
|
71
91
|
|
|
72
92
|
for (i = 1; i < len; i++) {
|
|
73
|
-
VALUE curr_value =
|
|
93
|
+
VALUE curr_value = RARRAY_AREF(self, i);
|
|
74
94
|
|
|
75
95
|
if (!FIXNUM_P(curr_value) && !RB_BIGNUM_TYPE_P(curr_value)) {
|
|
76
96
|
rb_raise(rb_eTypeError, "All elements must be of the same type");
|
|
@@ -94,10 +114,49 @@ process_integer_array(VALUE self, long len, long min_range_size)
|
|
|
94
114
|
return result;
|
|
95
115
|
}
|
|
96
116
|
|
|
117
|
+
static VALUE
|
|
118
|
+
process_date_array(VALUE self, long len, long min_range_size, VALUE first_elem)
|
|
119
|
+
{
|
|
120
|
+
VALUE result = rb_ary_new_capa(len);
|
|
121
|
+
VALUE group_start = first_elem;
|
|
122
|
+
VALUE group_end = first_elem;
|
|
123
|
+
VALUE first_class = CLASS_OF(first_elem);
|
|
124
|
+
long current_size = 1;
|
|
125
|
+
long i;
|
|
126
|
+
|
|
127
|
+
/* Cache the jd of the previous element to avoid double rb_funcall */
|
|
128
|
+
long prev_jd = NUM2LONG(rb_funcall(first_elem, id_jd, 0));
|
|
129
|
+
|
|
130
|
+
for (i = 1; i < len; i++) {
|
|
131
|
+
VALUE curr_value = RARRAY_AREF(self, i);
|
|
132
|
+
if (CLASS_OF(curr_value) != first_class) {
|
|
133
|
+
rb_raise(rb_eTypeError, "All elements must be of the same type");
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
long curr_jd = NUM2LONG(rb_funcall(curr_value, id_jd, 0));
|
|
137
|
+
|
|
138
|
+
if (curr_jd == prev_jd + 1) {
|
|
139
|
+
group_end = curr_value;
|
|
140
|
+
current_size++;
|
|
141
|
+
} else {
|
|
142
|
+
add_group_to_result_date(result, group_start, group_end, current_size, min_range_size);
|
|
143
|
+
group_start = curr_value;
|
|
144
|
+
group_end = curr_value;
|
|
145
|
+
current_size = 1;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
prev_jd = curr_jd;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
add_group_to_result_date(result, group_start, group_end, current_size, min_range_size);
|
|
152
|
+
|
|
153
|
+
return result;
|
|
154
|
+
}
|
|
155
|
+
|
|
97
156
|
static VALUE
|
|
98
157
|
process_generic_array(VALUE self, long len, long min_range_size, VALUE first_elem)
|
|
99
158
|
{
|
|
100
|
-
VALUE result =
|
|
159
|
+
VALUE result = rb_ary_new_capa(len);
|
|
101
160
|
VALUE group_start = first_elem;
|
|
102
161
|
VALUE group_end = first_elem;
|
|
103
162
|
VALUE prev_value = first_elem;
|
|
@@ -106,7 +165,7 @@ process_generic_array(VALUE self, long len, long min_range_size, VALUE first_ele
|
|
|
106
165
|
long i;
|
|
107
166
|
|
|
108
167
|
for (i = 1; i < len; i++) {
|
|
109
|
-
VALUE curr_value =
|
|
168
|
+
VALUE curr_value = RARRAY_AREF(self, i);
|
|
110
169
|
if (CLASS_OF(curr_value) != first_class) {
|
|
111
170
|
rb_raise(rb_eTypeError, "All elements must be of the same type");
|
|
112
171
|
}
|
|
@@ -136,6 +195,7 @@ rb_array_group_monotonic(int argc, VALUE *argv, VALUE self)
|
|
|
136
195
|
long min_range_size;
|
|
137
196
|
long len;
|
|
138
197
|
VALUE first_elem;
|
|
198
|
+
VALUE first_class;
|
|
139
199
|
|
|
140
200
|
rb_scan_args(argc, argv, "01", &min_range_size_val);
|
|
141
201
|
min_range_size = NIL_P(min_range_size_val) ? 3 : NUM2LONG(min_range_size_val);
|
|
@@ -150,12 +210,20 @@ rb_array_group_monotonic(int argc, VALUE *argv, VALUE self)
|
|
|
150
210
|
return rb_ary_new();
|
|
151
211
|
}
|
|
152
212
|
|
|
153
|
-
first_elem =
|
|
213
|
+
first_elem = RARRAY_AREF(self, 0);
|
|
214
|
+
first_class = CLASS_OF(first_elem);
|
|
154
215
|
|
|
216
|
+
/* Fast path for integers */
|
|
155
217
|
if (FIXNUM_P(first_elem) || RB_BIGNUM_TYPE_P(first_elem)) {
|
|
156
218
|
return process_integer_array(self, len, min_range_size);
|
|
157
219
|
}
|
|
158
220
|
|
|
221
|
+
/* Fast path for Date objects - use rb_obj_is_kind_of for safer checking */
|
|
222
|
+
if (rb_cDate != Qnil && rb_obj_is_kind_of(first_elem, rb_cDate)) {
|
|
223
|
+
return process_date_array(self, len, min_range_size, first_elem);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/* Generic path for other Comparable types */
|
|
159
227
|
if (!rb_respond_to(first_elem, id_succ)) {
|
|
160
228
|
rb_raise(rb_eTypeError, "Elements must respond to :succ method");
|
|
161
229
|
}
|
|
@@ -163,11 +231,30 @@ rb_array_group_monotonic(int argc, VALUE *argv, VALUE self)
|
|
|
163
231
|
return process_generic_array(self, len, min_range_size, first_elem);
|
|
164
232
|
}
|
|
165
233
|
|
|
234
|
+
static VALUE
|
|
235
|
+
get_date_class(VALUE obj)
|
|
236
|
+
{
|
|
237
|
+
return rb_const_get(obj, rb_intern("Date"));
|
|
238
|
+
}
|
|
239
|
+
|
|
166
240
|
void
|
|
167
241
|
Init_monotonic_grouper(void)
|
|
168
242
|
{
|
|
243
|
+
int state = 0;
|
|
244
|
+
|
|
169
245
|
id_succ = rb_intern("succ");
|
|
170
246
|
id_eq = rb_intern("==");
|
|
247
|
+
id_jd = rb_intern("jd");
|
|
248
|
+
|
|
171
249
|
rb_mMonotonicGrouper = rb_define_module("MonotonicGrouper");
|
|
250
|
+
|
|
251
|
+
/* Try to get Date class if it's loaded - use rb_protect to handle NameError */
|
|
252
|
+
rb_cDate = rb_protect(get_date_class, rb_cObject, &state);
|
|
253
|
+
if (state != 0) {
|
|
254
|
+
/* Date class not found, will use generic path */
|
|
255
|
+
rb_cDate = Qnil;
|
|
256
|
+
rb_set_errinfo(Qnil); /* Clear the error */
|
|
257
|
+
}
|
|
258
|
+
|
|
172
259
|
rb_define_method(rb_cArray, "group_monotonic", rb_array_group_monotonic, -1);
|
|
173
260
|
}
|
|
Binary file
|