u 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/README +38 -0
  2. data/Rakefile +64 -0
  3. data/ext/encoding/character/utf-8/break.c +25 -0
  4. data/ext/encoding/character/utf-8/data/break.h +22931 -0
  5. data/ext/encoding/character/utf-8/data/character-tables.h +14358 -0
  6. data/ext/encoding/character/utf-8/data/compose.h +1607 -0
  7. data/ext/encoding/character/utf-8/data/decompose.h +10926 -0
  8. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +1070 -0
  9. data/ext/encoding/character/utf-8/decompose.c +444 -0
  10. data/ext/encoding/character/utf-8/depend +65 -0
  11. data/ext/encoding/character/utf-8/extconf.rb +67 -0
  12. data/ext/encoding/character/utf-8/private.c +62 -0
  13. data/ext/encoding/character/utf-8/private.h +51 -0
  14. data/ext/encoding/character/utf-8/properties.c +1056 -0
  15. data/ext/encoding/character/utf-8/rb_includes.h +19 -0
  16. data/ext/encoding/character/utf-8/rb_methods.h +49 -0
  17. data/ext/encoding/character/utf-8/rb_private.h +52 -0
  18. data/ext/encoding/character/utf-8/rb_utf_aref.c +111 -0
  19. data/ext/encoding/character/utf-8/rb_utf_aset.c +105 -0
  20. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +24 -0
  21. data/ext/encoding/character/utf-8/rb_utf_chomp.c +114 -0
  22. data/ext/encoding/character/utf-8/rb_utf_chop.c +44 -0
  23. data/ext/encoding/character/utf-8/rb_utf_collate.c +13 -0
  24. data/ext/encoding/character/utf-8/rb_utf_count.c +30 -0
  25. data/ext/encoding/character/utf-8/rb_utf_delete.c +60 -0
  26. data/ext/encoding/character/utf-8/rb_utf_downcase.c +13 -0
  27. data/ext/encoding/character/utf-8/rb_utf_each_char.c +27 -0
  28. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +13 -0
  29. data/ext/encoding/character/utf-8/rb_utf_hex.c +14 -0
  30. data/ext/encoding/character/utf-8/rb_utf_index.c +50 -0
  31. data/ext/encoding/character/utf-8/rb_utf_insert.c +48 -0
  32. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +332 -0
  33. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +12 -0
  34. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +142 -0
  35. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +41 -0
  36. data/ext/encoding/character/utf-8/rb_utf_justify.c +96 -0
  37. data/ext/encoding/character/utf-8/rb_utf_length.c +14 -0
  38. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +41 -0
  39. data/ext/encoding/character/utf-8/rb_utf_normalize.c +51 -0
  40. data/ext/encoding/character/utf-8/rb_utf_oct.c +14 -0
  41. data/ext/encoding/character/utf-8/rb_utf_reverse.c +13 -0
  42. data/ext/encoding/character/utf-8/rb_utf_rindex.c +88 -0
  43. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +51 -0
  44. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +70 -0
  45. data/ext/encoding/character/utf-8/rb_utf_strip.c +27 -0
  46. data/ext/encoding/character/utf-8/rb_utf_to_i.c +25 -0
  47. data/ext/encoding/character/utf-8/rb_utf_tr.c +250 -0
  48. data/ext/encoding/character/utf-8/rb_utf_upcase.c +13 -0
  49. data/ext/encoding/character/utf-8/tables.h +38 -0
  50. data/ext/encoding/character/utf-8/unicode.c +319 -0
  51. data/ext/encoding/character/utf-8/unicode.h +216 -0
  52. data/ext/encoding/character/utf-8/utf.c +1334 -0
  53. data/lib/encoding/character/utf-8.rb +201 -0
  54. data/lib/u.rb +16 -0
  55. data/lib/u/string.rb +185 -0
  56. data/lib/u/version.rb +5 -0
  57. data/test/unit/u.rb +5 -0
  58. data/test/unit/u/string.rb +91 -0
  59. metadata +174 -0
@@ -0,0 +1,44 @@
1
+ /*
2
+ * contents: UTF8.chop module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_chop_bang(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ StringValue(str);
13
+
14
+ if (RSTRING(str)->len == 0)
15
+ return Qnil;
16
+
17
+ rb_str_modify(str);
18
+
19
+ const char *end = RSTRING(str)->ptr + RSTRING(str)->len;
20
+
21
+ char *last = rb_utf_prev_validated(RSTRING(str)->ptr, end);
22
+
23
+ if (_utf_char_validated(last, end) == '\n') {
24
+ char *last_but_one = utf_find_prev(RSTRING(str)->ptr, last);
25
+
26
+ if (last_but_one != NULL && utf_char(last_but_one) == '\r')
27
+ last = last_but_one;
28
+ } else if (!unichar_isnewline(utf_char(last))) {
29
+ return Qnil;
30
+ }
31
+
32
+ RSTRING(str)->len -= (RSTRING(str)->ptr + RSTRING(str)->len) - last;
33
+ *last = '\0';
34
+
35
+ return str;
36
+ }
37
+
38
+ VALUE
39
+ rb_utf_chop(VALUE self, VALUE str)
40
+ {
41
+ str = rb_utf_dup(str);
42
+ rb_utf_chop_bang(self, str);
43
+ return str;
44
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.collate module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_collate(UNUSED(VALUE self), VALUE str, VALUE other)
11
+ {
12
+ return INT2FIX(utf_collate(StringValuePtr(str), StringValuePtr(other)));
13
+ }
@@ -0,0 +1,30 @@
1
+ /*
2
+ * contents: UTF8.count module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_tr.h"
9
+
10
+ VALUE
11
+ rb_utf_count(int argc, VALUE *argv, UNUSED(VALUE self))
12
+ {
13
+ need_at_least_n_arguments(argc, 2);
14
+
15
+ VALUE str = argv[0];
16
+ StringValue(str);
17
+ if (RSTRING(str)->len == 0)
18
+ return INT2FIX(0);
19
+
20
+ unsigned int table[TR_TABLE_SIZE];
21
+ tr_setup_table_from_strings(table, argc - 1, &argv[1]);
22
+
23
+ long count = 0;
24
+ char const *p_end = RSTRING(str)->ptr + RSTRING(str)->len;
25
+ for (char const *p = RSTRING(str)->ptr; p < p_end; p = utf_next(p))
26
+ if (tr_table_lookup(table, _utf_char_validated(p, p_end)))
27
+ count++;
28
+
29
+ return LONG2NUM(count);
30
+ }
@@ -0,0 +1,60 @@
1
+ /*
2
+ * contents: UTF8.delete module functions.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_tr.h"
9
+
10
+ VALUE
11
+ rb_utf_delete_bang(int argc, VALUE *argv, UNUSED(VALUE self))
12
+ {
13
+ need_at_least_n_arguments(argc, 2);
14
+
15
+ VALUE str = argv[0];
16
+ StringValue(str);
17
+ if (RSTRING(str)->len == 0)
18
+ return Qnil;
19
+
20
+ unsigned int table[TR_TABLE_SIZE];
21
+ tr_setup_table_from_strings(table, argc - 1, &argv[1]);
22
+
23
+ rb_str_modify(str);
24
+
25
+ bool modified = false;
26
+ char *s = RSTRING(str)->ptr;
27
+ char const *s_end = s + RSTRING(str)->len;
28
+ char *t = s;
29
+ while (s < s_end) {
30
+ unichar c = utf_char(s);
31
+
32
+ char *next = rb_utf_next_validated(s, s_end);
33
+ if (tr_table_lookup(table, c)) {
34
+ modified = true;
35
+ } else {
36
+ memmove(t, s, next - s);
37
+ t += next - s;
38
+ }
39
+
40
+ s = next;
41
+ }
42
+ *t = '\0';
43
+ RSTRING(str)->len = t - RSTRING(str)->ptr;
44
+
45
+ if (modified)
46
+ return str;
47
+
48
+ return Qnil;
49
+ }
50
+
51
+ VALUE
52
+ rb_utf_delete(int argc, VALUE *argv, VALUE self)
53
+ {
54
+ need_at_least_n_arguments(argc, 2);
55
+
56
+ StringValue(argv[0]);
57
+ argv[0] = rb_utf_dup(argv[0]);
58
+ rb_utf_delete_bang(argc, argv, self);
59
+ return argv[0];
60
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.downcase module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_downcase(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ return rb_utf_alloc_using(utf_downcase(StringValuePtr(str)));
13
+ }
@@ -0,0 +1,27 @@
1
+ /*
2
+ * contents: UTF8.each_char module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_each_char(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ #if 0
13
+ RETURN_ENUMERATOR(str, 0, 0);
14
+ #endif
15
+
16
+ const char *s = RSTRING(str)->ptr;
17
+ const char *s_end = s + RSTRING(str)->len;
18
+ while (s < s_end) {
19
+ char buf[MAX_UNICHAR_BYTE_LENGTH];
20
+ int len = unichar_to_utf(_utf_char_validated(s, s_end), buf);
21
+ VALUE c = rb_utf_new(buf, len);
22
+ rb_yield(c);
23
+ s = utf_next(s);
24
+ }
25
+
26
+ return str;
27
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.folcase module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_foldcase(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ return rb_utf_alloc_using(utf_foldcase(StringValuePtr(str)));
13
+ }
@@ -0,0 +1,14 @@
1
+ /*
2
+ * contents: UTF8.hex module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_bignum.h"
9
+
10
+ VALUE
11
+ rb_utf_hex(UNUSED(VALUE self), VALUE str)
12
+ {
13
+ return rb_utf_to_inum(str, 16, false);
14
+ }
@@ -0,0 +1,50 @@
1
+ /*
2
+ * contents: UTF8.index module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_index_m(int argc, VALUE *argv, UNUSED(VALUE self))
11
+ {
12
+ VALUE str, sub, rboffset;
13
+
14
+ long offset = 0;
15
+ if (rb_scan_args(argc, argv, "21", &str, &sub, &rboffset) == 3)
16
+ offset = NUM2LONG(rboffset);
17
+
18
+ StringValue(str);
19
+
20
+ char *begin, *end;
21
+ if (!rb_utf_begin_from_offset(str, offset, &begin, &end)) {
22
+ if (TYPE(sub) == T_REGEXP)
23
+ rb_backref_set(Qnil);
24
+
25
+ return Qnil;
26
+ }
27
+
28
+ switch (TYPE(sub)) {
29
+ case T_REGEXP:
30
+ offset = rb_utf_index_regexp(str, begin, end, sub, offset, false);
31
+ break;
32
+ default: {
33
+ VALUE tmp = rb_check_string_type(sub);
34
+ if (NIL_P(tmp))
35
+ rb_raise(rb_eTypeError, "type mismatch: %s given",
36
+ rb_obj_classname(sub));
37
+
38
+ sub = tmp;
39
+ }
40
+ /* fall through */
41
+ case T_STRING:
42
+ offset = rb_utf_index(str, sub, offset);
43
+ break;
44
+ }
45
+
46
+ if (offset < 0)
47
+ return Qnil;
48
+
49
+ return LONG2NUM(offset);
50
+ }
@@ -0,0 +1,48 @@
1
+ /*
2
+ * contents: UTF8.insert module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_insert(UNUSED(VALUE self), VALUE str, VALUE index, VALUE other)
11
+ {
12
+ long offset = NUM2LONG(index);
13
+ StringValue(str);
14
+
15
+ long n_chars = utf_length_n(RSTRING(str)->ptr, RSTRING(str)->len);
16
+ /*
17
+ char *begin, *end;
18
+ if (offset < 0)
19
+ offset++;
20
+ rb_utf_begin_from_offset_validated(str, offset, &begin, &end);
21
+ puts(begin);
22
+ rb_str_update(str, begin - RSTRING(str)->ptr, 0, other);
23
+ */
24
+ if (abs(offset) > n_chars) {
25
+ if (offset < 0)
26
+ offset += n_chars;
27
+ rb_raise(rb_eIndexError, "index %ld out of string", offset);
28
+ }
29
+
30
+ long byte_index;
31
+
32
+ if (offset == -1) {
33
+ byte_index = RSTRING(str)->len;
34
+ } else {
35
+ if (offset < 0)
36
+ offset++;
37
+
38
+ char *s = RSTRING(str)->ptr;
39
+
40
+ if (offset < 0)
41
+ s += RSTRING(str)->len;
42
+ byte_index = utf_offset_to_pointer(s, offset) - s;
43
+ }
44
+
45
+ rb_str_update(str, byte_index, 0, other);
46
+
47
+ return str;
48
+ }
@@ -0,0 +1,332 @@
1
+ /*
2
+ * contents: Internal functionality for turning strings into Bignums.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_bignum.h"
9
+
10
+ /* Stolen straight from bignum.c. */
11
+ #define BDIGITS(x) ((BDIGIT *)RBIGNUM(x)->digits)
12
+ #define BITSPERDIG (SIZEOF_BDIGITS * CHAR_BIT)
13
+ #define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG)
14
+ #define BIGDN(x) RSHIFT(x, BITSPERDIG)
15
+ #define BIGLO(x) ((BDIGIT)((x) & (BIGRAD - 1)))
16
+
17
+ static VALUE
18
+ bignew_1(VALUE klass, long len, int sign)
19
+ {
20
+ NEWOBJ(big, struct RBignum);
21
+ OBJSETUP(big, klass, T_BIGNUM);
22
+ big->sign = sign ? 1 : 0;
23
+ big->len = len;
24
+ big->digits = ALLOC_N(BDIGIT, len);
25
+
26
+ return (VALUE)big;
27
+ }
28
+
29
+ #define bignew(len, sign) bignew_1(rb_cBignum, len, sign)
30
+
31
+ static const char *
32
+ rb_utf_to_inum_sign(const char *s, int *sign)
33
+ {
34
+ *sign = 1;
35
+
36
+ if (*s == '-')
37
+ *sign = 0;
38
+
39
+ if (*s == '+' || *s == '-')
40
+ return s + 1;
41
+
42
+ return s;
43
+ }
44
+
45
+ static const char *
46
+ rb_utf_to_inum_base(const char *s, int *base)
47
+ {
48
+ if (s[0] == '0') {
49
+ int offset = 2;
50
+ switch (s[1]) {
51
+ case 'x': case 'X':
52
+ *base = 16;
53
+ break;
54
+ case 'b': case 'B':
55
+ *base = 2;
56
+ break;
57
+ case 'o': case 'O':
58
+ *base = 8;
59
+ break;
60
+ case 'd': case 'D':
61
+ *base = 10;
62
+ break;
63
+ default:
64
+ *base = 8;
65
+ offset = 1;
66
+ break;
67
+ }
68
+ return s + offset;
69
+ } else if (*base < -1) {
70
+ *base = -*base;
71
+ } else {
72
+ *base = 10;
73
+ }
74
+
75
+ return s;
76
+ }
77
+
78
+ static size_t
79
+ rb_utf_to_inum_base_bit_length(const char *s, int base)
80
+ {
81
+ if (base < 2 || base > 36)
82
+ rb_raise(rb_eArgError, "illegal radix %d", base);
83
+
84
+ size_t bit_length;
85
+ switch (base) {
86
+ case 2:
87
+ bit_length = 1;
88
+ case 3:
89
+ bit_length = 2;
90
+ case 4: case 5: case 6: case 7: case 8:
91
+ bit_length = 3;
92
+ case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
93
+ bit_length = 4;
94
+ default:
95
+ if (base <= 32)
96
+ bit_length = 5;
97
+
98
+ bit_length = 6;
99
+ }
100
+
101
+ return bit_length * utf_length(s);
102
+ }
103
+
104
+ static bool
105
+ rb_utf_to_inum_num_separator(const char *str, const char *s, bool verify,
106
+ unichar c, unichar *non_digit)
107
+ {
108
+ if (c != '_')
109
+ return false;
110
+
111
+ if (!verify)
112
+ return true;
113
+
114
+ if (*non_digit != 0)
115
+ rb_raise(rb_eArgError,
116
+ "unexpected ‘%lc’ found at position %ld",
117
+ c, utf_pointer_to_offset(str, s));
118
+
119
+ *non_digit = c;
120
+
121
+ return true;
122
+ }
123
+
124
+ static bool
125
+ rb_utf_to_inum_digit_value(const char *str, const char *s, unichar c,
126
+ int base, bool verify, int *digit_value)
127
+ {
128
+ /* If we stumble upon a space, return false so that we may end our
129
+ * processing and skip over any trailing white-space. */
130
+ if (unichar_isspace(c))
131
+ return false;
132
+
133
+ int value = unichar_xdigit_value(c);
134
+ if (value == -1) {
135
+ if (!verify)
136
+ return false;
137
+ rb_raise(rb_eArgError,
138
+ "non-digit character ‘%lc’ found at position %ld",
139
+ c, utf_pointer_to_offset(str, s));
140
+ }
141
+
142
+ if (value >= base) {
143
+ if (!verify)
144
+ return false;
145
+
146
+ rb_raise(rb_eArgError,
147
+ "value (%d) greater than base (%d) at position %ld",
148
+ value, base, utf_pointer_to_offset(str, s));
149
+ }
150
+
151
+ *digit_value = value;
152
+
153
+ return true;
154
+ }
155
+
156
+ static VALUE
157
+ rb_utf_to_inum_as_fix(const char *str, const char *s, int sign, int base,
158
+ bool verify)
159
+ {
160
+ unsigned long value = 0;
161
+
162
+ unichar non_digit = 0;
163
+ while (*s != '\0') {
164
+ unichar c = utf_char(s);
165
+ s = utf_next(s);
166
+
167
+ if (rb_utf_to_inum_num_separator(str, s, verify, c, &non_digit))
168
+ continue;
169
+
170
+ int digit_value;
171
+ if (!rb_utf_to_inum_digit_value(str, s, c, base, verify, &digit_value))
172
+ break;
173
+ value *= base;
174
+ value += digit_value;
175
+
176
+ non_digit = 0;
177
+ }
178
+
179
+ if (verify) {
180
+ while (*s != '\0' && unichar_isspace(utf_char(s)))
181
+ s = utf_next(s);
182
+ if (*s != '\0')
183
+ rb_raise(rb_eArgError,
184
+ "trailing garbage found at position %ld",
185
+ utf_pointer_to_offset(str, s));
186
+ }
187
+
188
+ if (POSFIXABLE(value)) {
189
+ if (sign)
190
+ return LONG2FIX(value);
191
+ else
192
+ return LONG2FIX(-(long)value);
193
+ }
194
+
195
+ VALUE big = rb_uint2big(value);
196
+ RBIGNUM(big)->sign = sign;
197
+ return rb_big_norm(big);
198
+ }
199
+
200
+ static VALUE
201
+ rb_cutf_to_inum(const char * const str, int base, bool verify)
202
+ {
203
+ /* FIXME: How can this even happen? */
204
+ if (str == NULL) {
205
+ if (verify)
206
+ rb_invalid_str(str, "Integer");
207
+ return INT2FIX(0);
208
+ }
209
+
210
+ const char *s = str;
211
+
212
+ /* Skip any leading whitespace. */
213
+ while (unichar_isspace(utf_char(s)))
214
+ s = utf_next(s);
215
+
216
+ /* Figure out what sign this number uses. */
217
+ int sign;
218
+ s = rb_utf_to_inum_sign(s, &sign);
219
+
220
+ /* Do we have another sign? If so, that’s not correct. */
221
+ if (*s == '+' || *s == '-') {
222
+ if (verify)
223
+ rb_raise(rb_eArgError,
224
+ "extra sign ‘%c’ found at position %ld",
225
+ *s, utf_pointer_to_offset(str, s));
226
+ return INT2FIX(0);
227
+ }
228
+
229
+ int tmp_base = base;
230
+ s = rb_utf_to_inum_base(s, &tmp_base);
231
+ if (base <= 0)
232
+ base = tmp_base;
233
+
234
+ /* Remove preceeding 0s. */
235
+ while (*s == '0')
236
+ s++;
237
+
238
+ /* Figure out how many bits we need to represent the number. */
239
+ size_t bit_length = rb_utf_to_inum_base_bit_length(str, base);
240
+
241
+ /* If the bit_length is less than the number of bits in a VALUE we can
242
+ * try to store it as a FIXNUM. */
243
+ if (bit_length <= sizeof(VALUE) * CHAR_BIT)
244
+ return rb_utf_to_inum_as_fix(str, s, sign, base, verify);
245
+
246
+ if (verify && *str == '_')
247
+ rb_raise(rb_eArgError,
248
+ "leading digit-separator ‘_’ found at position %ld",
249
+ utf_pointer_to_offset(str, s));
250
+
251
+ bit_length = bit_length / BITSPERDIG + 1;
252
+
253
+ /* TODO: Rename these variables. */
254
+ VALUE z = bignew(bit_length, sign);
255
+ BDIGIT *zds = BDIGITS(z);
256
+ MEMZERO(zds, BDIGIT, bit_length);
257
+ int big_len = 1;
258
+
259
+ unichar non_digit = 0;
260
+ while (true) {
261
+ unichar c = utf_char(s);
262
+ s = utf_next(s);
263
+
264
+ if (rb_utf_to_inum_num_separator(str, s, verify, c, &non_digit))
265
+ continue;
266
+
267
+ int digit_value;
268
+ if (!rb_utf_to_inum_digit_value(str, s, c, base, verify, &digit_value))
269
+ break;
270
+
271
+ bool more_to_shift = true;
272
+ while (more_to_shift) {
273
+ BDIGIT_DBL num = digit_value;
274
+
275
+ for (int i = 0; i < big_len; i++) {
276
+ num += (BDIGIT_DBL)zds[i] * base;
277
+ zds[i] = BIGLO(num);
278
+ num = BIGDN(num);
279
+ }
280
+
281
+ more_to_shift = false;
282
+ if (num != 0) {
283
+ big_len++;
284
+ more_to_shift = true;
285
+ }
286
+ }
287
+
288
+ non_digit = 0;
289
+ }
290
+
291
+ if (!verify)
292
+ return rb_big_norm(z);
293
+
294
+ s--;
295
+ if (str + 1 < s && s[-1] == '_')
296
+ rb_raise(rb_eArgError,
297
+ "trailing digit-separator ‘_’ found at position %ld",
298
+ utf_pointer_to_offset(str, s));
299
+
300
+ if (*s != '\0')
301
+ rb_raise(rb_eArgError,
302
+ "trailing garbage found at position %ld",
303
+ utf_pointer_to_offset(str, s));
304
+
305
+ return rb_big_norm(z);
306
+ }
307
+
308
+ VALUE
309
+ rb_utf_to_inum(VALUE str, int base, bool verify)
310
+ {
311
+ StringValue(str);
312
+
313
+ char *s;
314
+ if (verify)
315
+ s = StringValueCStr(str);
316
+ else
317
+ s = RSTRING(str)->ptr;
318
+
319
+ if (s != NULL) {
320
+ long len = RSTRING(str)->len;
321
+ /* no sentinel somehow */
322
+ if (s[len] != '\0') {
323
+ char *p = ALLOCA_N(char, len + 1);
324
+
325
+ MEMCPY(p, s, char, len);
326
+ p[len] = '\0';
327
+ s = p;
328
+ }
329
+ }
330
+
331
+ return rb_cutf_to_inum(s, base, verify);
332
+ }