character-encodings 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/README +26 -0
  2. data/Rakefile +157 -0
  3. data/ext/encoding/character/unicode/codepoint.c +48 -0
  4. data/ext/encoding/character/utf-8/break.c +38 -0
  5. data/ext/encoding/character/utf-8/data/break.h +22931 -0
  6. data/ext/encoding/character/utf-8/data/character-tables.h +14356 -0
  7. data/ext/encoding/character/utf-8/data/compose.h +1607 -0
  8. data/ext/encoding/character/utf-8/data/decompose.h +10925 -0
  9. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +1065 -0
  10. data/ext/encoding/character/utf-8/decompose.c +476 -0
  11. data/ext/encoding/character/utf-8/depend +64 -0
  12. data/ext/encoding/character/utf-8/extconf.rb +47 -0
  13. data/ext/encoding/character/utf-8/private.h +68 -0
  14. data/ext/encoding/character/utf-8/properties.c +1061 -0
  15. data/ext/encoding/character/utf-8/rb_includes.h +18 -0
  16. data/ext/encoding/character/utf-8/rb_methods.h +49 -0
  17. data/ext/encoding/character/utf-8/rb_utf_aref.c +111 -0
  18. data/ext/encoding/character/utf-8/rb_utf_aset.c +105 -0
  19. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +24 -0
  20. data/ext/encoding/character/utf-8/rb_utf_chomp.c +114 -0
  21. data/ext/encoding/character/utf-8/rb_utf_chop.c +44 -0
  22. data/ext/encoding/character/utf-8/rb_utf_collate.c +13 -0
  23. data/ext/encoding/character/utf-8/rb_utf_count.c +30 -0
  24. data/ext/encoding/character/utf-8/rb_utf_delete.c +60 -0
  25. data/ext/encoding/character/utf-8/rb_utf_downcase.c +13 -0
  26. data/ext/encoding/character/utf-8/rb_utf_each_char.c +27 -0
  27. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +13 -0
  28. data/ext/encoding/character/utf-8/rb_utf_hex.c +14 -0
  29. data/ext/encoding/character/utf-8/rb_utf_index.c +50 -0
  30. data/ext/encoding/character/utf-8/rb_utf_insert.c +43 -0
  31. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +331 -0
  32. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +12 -0
  33. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +142 -0
  34. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +41 -0
  35. data/ext/encoding/character/utf-8/rb_utf_justify.c +96 -0
  36. data/ext/encoding/character/utf-8/rb_utf_length.c +14 -0
  37. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +41 -0
  38. data/ext/encoding/character/utf-8/rb_utf_normalize.c +51 -0
  39. data/ext/encoding/character/utf-8/rb_utf_oct.c +14 -0
  40. data/ext/encoding/character/utf-8/rb_utf_reverse.c +13 -0
  41. data/ext/encoding/character/utf-8/rb_utf_rindex.c +88 -0
  42. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +51 -0
  43. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +70 -0
  44. data/ext/encoding/character/utf-8/rb_utf_strip.c +27 -0
  45. data/ext/encoding/character/utf-8/rb_utf_to_i.c +25 -0
  46. data/ext/encoding/character/utf-8/rb_utf_tr.c +250 -0
  47. data/ext/encoding/character/utf-8/rb_utf_upcase.c +13 -0
  48. data/ext/encoding/character/utf-8/unicode.c +319 -0
  49. data/ext/encoding/character/utf-8/unicode.h +208 -0
  50. data/ext/encoding/character/utf-8/utf.c +1332 -0
  51. data/lib/encoding/character/utf-8.rb +201 -0
  52. data/specifications/aref.rb +45 -0
  53. data/specifications/count.rb +29 -0
  54. data/specifications/delete.rb +25 -0
  55. data/specifications/each_char.rb +28 -0
  56. data/specifications/index.rb +35 -0
  57. data/specifications/insert.rb +67 -0
  58. data/specifications/length.rb +45 -0
  59. data/specifications/rindex.rb +52 -0
  60. data/specifications/squeeze.rb +25 -0
  61. data/specifications/to_i.rb +54 -0
  62. data/specifications/tr.rb +39 -0
  63. data/tests/foldcase.rb +28 -0
  64. data/tests/normalize.rb +101 -0
  65. data/tests/unicodedatatestbase.rb +45 -0
  66. metadata +112 -0
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.collate module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_collate(UNUSED(VALUE self), VALUE str, VALUE other)
11
+ {
12
+ return INT2FIX(utf_collate(StringValuePtr(str), StringValuePtr(other)));
13
+ }
@@ -0,0 +1,30 @@
1
+ /*
2
+ * contents: UTF8.count module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_tr.h"
9
+
10
+ VALUE
11
+ rb_utf_count(int argc, VALUE *argv, UNUSED(VALUE self))
12
+ {
13
+ need_at_least_n_arguments(argc, 2);
14
+
15
+ VALUE str = argv[0];
16
+ StringValue(str);
17
+ if (RSTRING(str)->len == 0)
18
+ return INT2FIX(0);
19
+
20
+ unsigned int table[TR_TABLE_SIZE];
21
+ tr_setup_table_from_strings(table, argc - 1, &argv[1]);
22
+
23
+ long count = 0;
24
+ char const *p_end = RSTRING(str)->ptr + RSTRING(str)->len;
25
+ for (char const *p = RSTRING(str)->ptr; p < p_end; p = utf_next(p))
26
+ if (tr_table_lookup(table, _utf_char_validated(p, p_end)))
27
+ count++;
28
+
29
+ return LONG2NUM(count);
30
+ }
@@ -0,0 +1,60 @@
1
+ /*
2
+ * contents: UTF8.delete module functions.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_tr.h"
9
+
10
+ VALUE
11
+ rb_utf_delete_bang(int argc, VALUE *argv, UNUSED(VALUE self))
12
+ {
13
+ need_at_least_n_arguments(argc, 2);
14
+
15
+ VALUE str = argv[0];
16
+ StringValue(str);
17
+ if (RSTRING(str)->len == 0)
18
+ return Qnil;
19
+
20
+ unsigned int table[TR_TABLE_SIZE];
21
+ tr_setup_table_from_strings(table, argc - 1, &argv[1]);
22
+
23
+ rb_str_modify(str);
24
+
25
+ bool modified = false;
26
+ char *s = RSTRING(str)->ptr;
27
+ char const *s_end = s + RSTRING(str)->len;
28
+ char *t = s;
29
+ while (s < s_end) {
30
+ unichar c = utf_char(s);
31
+
32
+ char *next = rb_utf_next_validated(s, s_end);
33
+ if (tr_table_lookup(table, c)) {
34
+ modified = true;
35
+ } else {
36
+ memmove(t, s, next - s);
37
+ t += next - s;
38
+ }
39
+
40
+ s = next;
41
+ }
42
+ *t = '\0';
43
+ RSTRING(str)->len = t - RSTRING(str)->ptr;
44
+
45
+ if (modified)
46
+ return str;
47
+
48
+ return Qnil;
49
+ }
50
+
51
+ VALUE
52
+ rb_utf_delete(int argc, VALUE *argv, VALUE self)
53
+ {
54
+ need_at_least_n_arguments(argc, 2);
55
+
56
+ StringValue(argv[0]);
57
+ argv[0] = rb_utf_dup(argv[0]);
58
+ rb_utf_delete_bang(argc, argv, self);
59
+ return argv[0];
60
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.downcase module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_downcase(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ return rb_utf_alloc_using(utf_downcase(StringValuePtr(str)));
13
+ }
@@ -0,0 +1,27 @@
1
+ /*
2
+ * contents: UTF8.each_char module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_each_char(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ #if 0
13
+ RETURN_ENUMERATOR(str, 0, 0);
14
+ #endif
15
+
16
+ const char *s = RSTRING(str)->ptr;
17
+ const char *s_end = s + RSTRING(str)->len;
18
+ while (s < s_end) {
19
+ char buf[MAX_UNICHAR_BYTE_LENGTH];
20
+ int len = unichar_to_utf(_utf_char_validated(s, s_end), buf);
21
+ VALUE c = rb_utf_new(buf, len);
22
+ rb_yield(c);
23
+ s = utf_next(s);
24
+ }
25
+
26
+ return str;
27
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.folcase module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_foldcase(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ return rb_utf_alloc_using(utf_foldcase(StringValuePtr(str)));
13
+ }
@@ -0,0 +1,14 @@
1
+ /*
2
+ * contents: UTF8.hex module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_bignum.h"
9
+
10
+ VALUE
11
+ rb_utf_hex(UNUSED(VALUE self), VALUE str)
12
+ {
13
+ return rb_utf_to_inum(str, 16, false);
14
+ }
@@ -0,0 +1,50 @@
1
+ /*
2
+ * contents: UTF8.index module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_index_m(int argc, VALUE *argv, UNUSED(VALUE self))
11
+ {
12
+ VALUE str, sub, rboffset;
13
+
14
+ long offset = 0;
15
+ if (rb_scan_args(argc, argv, "21", &str, &sub, &rboffset) == 3)
16
+ offset = NUM2LONG(rboffset);
17
+
18
+ StringValue(str);
19
+
20
+ char *begin, *end;
21
+ if (!rb_utf_begin_from_offset(str, offset, &begin, &end)) {
22
+ if (TYPE(sub) == T_REGEXP)
23
+ rb_backref_set(Qnil);
24
+
25
+ return Qnil;
26
+ }
27
+
28
+ switch (TYPE(sub)) {
29
+ case T_REGEXP:
30
+ offset = rb_utf_index_regexp(str, begin, end, sub, offset, false);
31
+ break;
32
+ default: {
33
+ VALUE tmp = rb_check_string_type(sub);
34
+ if (NIL_P(tmp))
35
+ rb_raise(rb_eTypeError, "type mismatch: %s given",
36
+ rb_obj_classname(sub));
37
+
38
+ sub = tmp;
39
+ }
40
+ /* fall through */
41
+ case T_STRING:
42
+ offset = rb_utf_index(str, sub, offset);
43
+ break;
44
+ }
45
+
46
+ if (offset < 0)
47
+ return Qnil;
48
+
49
+ return LONG2NUM(offset);
50
+ }
@@ -0,0 +1,43 @@
1
+ /*
2
+ * contents: UTF8.insert module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ /* TODO: Update to use new offset-calculating functions. */
10
+ VALUE
11
+ rb_utf_insert(UNUSED(VALUE self), VALUE str, VALUE index, VALUE other)
12
+ {
13
+ long offset = NUM2LONG(index);
14
+
15
+ StringValue(str);
16
+
17
+ long n_chars = utf_length_n(RSTRING(str)->ptr, RSTRING(str)->len);
18
+
19
+ if (abs(offset) > n_chars) {
20
+ if (offset < 0)
21
+ offset -= n_chars;
22
+ rb_raise(rb_eIndexError, "index %ld out of string", offset);
23
+ }
24
+
25
+ long byte_index;
26
+
27
+ if (offset == -1) {
28
+ byte_index = RSTRING(str)->len;
29
+ } else {
30
+ if (offset < 0)
31
+ offset++;
32
+
33
+ char *s = RSTRING(str)->ptr;
34
+
35
+ if (offset < 0)
36
+ s += RSTRING(str)->len;
37
+ byte_index = utf_offset_to_pointer(s, offset) - s;
38
+ }
39
+
40
+ rb_str_update(str, byte_index, 0, other);
41
+
42
+ return str;
43
+ }
@@ -0,0 +1,331 @@
1
+ /*
2
+ * contents: Internal functionality for turning strings into Bignums.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_bignum.h"
9
+
10
+ /* Stolen straight from bignum.c. */
11
+ #define BDIGITS(x) ((BDIGIT *)RBIGNUM(x)->digits)
12
+ #define BITSPERDIG (SIZEOF_BDIGITS * CHAR_BIT)
13
+ #define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG)
14
+ #define BIGDN(x) RSHIFT(x, BITSPERDIG)
15
+ #define BIGLO(x) ((BDIGIT)((x) & (BIGRAD - 1)))
16
+
17
+ static VALUE
18
+ bignew_1(VALUE klass, long len, int sign)
19
+ {
20
+ NEWOBJ(big, struct RBignum);
21
+ OBJSETUP(big, klass, T_BIGNUM);
22
+ big->sign = sign ? 1 : 0;
23
+ big->len = len;
24
+ big->digits = ALLOC_N(BDIGIT, len);
25
+
26
+ return (VALUE)big;
27
+ }
28
+
29
+ #define bignew(len, sign) bignew_1(rb_cBignum, len, sign)
30
+
31
+ static const char *
32
+ rb_utf_to_inum_sign(const char *s, int *sign)
33
+ {
34
+ *sign = 1;
35
+
36
+ if (*s == '-')
37
+ *sign = 0;
38
+
39
+ if (*s == '+' || *s == '-')
40
+ return s + 1;
41
+
42
+ return s;
43
+ }
44
+
45
+ static const char *
46
+ rb_utf_to_inum_base(const char *s, int *base)
47
+ {
48
+ if (s[0] == '0') {
49
+ int offset = 2;
50
+ switch (s[1]) {
51
+ case 'x': case 'X':
52
+ *base = 16;
53
+ break;
54
+ case 'b': case 'B':
55
+ *base = 2;
56
+ break;
57
+ case 'o': case 'O':
58
+ *base = 8;
59
+ break;
60
+ case 'd': case 'D':
61
+ *base = 10;
62
+ break;
63
+ default:
64
+ *base = 8;
65
+ offset = 1;
66
+ break;
67
+ }
68
+ return s + offset;
69
+ } else if (*base < -1) {
70
+ *base = -*base;
71
+ } else {
72
+ *base = 10;
73
+ }
74
+
75
+ return s;
76
+ }
77
+
78
+ static size_t
79
+ rb_utf_to_inum_base_bit_length(const char *s, int base)
80
+ {
81
+ if (base < 2 || base > 36)
82
+ rb_raise(rb_eArgError, "illegal radix %d", base);
83
+
84
+ size_t bit_length;
85
+ switch (base) {
86
+ case 2:
87
+ bit_length = 1;
88
+ case 3:
89
+ bit_length = 2;
90
+ case 4: case 5: case 6: case 7: case 8:
91
+ bit_length = 3;
92
+ case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
93
+ bit_length = 4;
94
+ default:
95
+ if (base <= 32)
96
+ bit_length = 5;
97
+
98
+ bit_length = 6;
99
+ }
100
+
101
+ return bit_length * utf_length(s);
102
+ }
103
+
104
+ static bool
105
+ rb_utf_to_inum_num_separator(const char *str, const char *s, bool verify,
106
+ unichar c, unichar *non_digit)
107
+ {
108
+ if (c != '_')
109
+ return false;
110
+
111
+ if (!verify)
112
+ return true;
113
+
114
+ if (*non_digit != 0)
115
+ rb_raise(rb_eArgError,
116
+ "unexpected ‘%lc’ found at position %ld", c, s - str);
117
+
118
+ *non_digit = c;
119
+
120
+ return true;
121
+ }
122
+
123
+ static bool
124
+ rb_utf_to_inum_digit_value(const char *str, const char *s, unichar c,
125
+ int base, bool verify, int *digit_value)
126
+ {
127
+ /* If we stumble upon a space, return false so that we may end our
128
+ * processing and skip over any trailing white-space. */
129
+ if (unichar_isspace(c))
130
+ return false;
131
+
132
+ int value = unichar_xdigit_value(c);
133
+ if (value == -1) {
134
+ if (!verify)
135
+ return false;
136
+ rb_raise(rb_eArgError,
137
+ "non-digit character ‘%lc’ found at position %ld",
138
+ c, s - str);
139
+ }
140
+
141
+ if (value >= base) {
142
+ if (!verify)
143
+ return false;
144
+
145
+ rb_raise(rb_eArgError,
146
+ "value (%d) greater than base (%d) at position %ld",
147
+ value, base, s - str);
148
+ }
149
+
150
+ *digit_value = value;
151
+
152
+ return true;
153
+ }
154
+
155
+ static VALUE
156
+ rb_utf_to_inum_as_fix(const char *str, const char *s, int sign, int base,
157
+ bool verify)
158
+ {
159
+ unsigned long value = 0;
160
+
161
+ unichar non_digit = 0;
162
+ while (*s != '\0') {
163
+ unichar c = utf_char(s);
164
+ s = utf_next(s);
165
+
166
+ if (rb_utf_to_inum_num_separator(str, s, verify, c, &non_digit))
167
+ continue;
168
+
169
+ int digit_value;
170
+ if (!rb_utf_to_inum_digit_value(str, s, c, base, verify, &digit_value))
171
+ break;
172
+ value *= base;
173
+ value += digit_value;
174
+
175
+ non_digit = 0;
176
+ }
177
+
178
+ if (verify) {
179
+ while (*s != '\0' && unichar_isspace(utf_char(s)))
180
+ s = utf_next(s);
181
+ if (*s != '\0')
182
+ rb_raise(rb_eArgError,
183
+ "trailing garbage found at position %ld",
184
+ s - str);
185
+ }
186
+
187
+ if (POSFIXABLE(value)) {
188
+ if (sign)
189
+ return LONG2FIX(value);
190
+ else
191
+ return LONG2FIX(-(long)value);
192
+ }
193
+
194
+ VALUE big = rb_uint2big(value);
195
+ RBIGNUM(big)->sign = sign;
196
+ return rb_big_norm(big);
197
+ }
198
+
199
+ static VALUE
200
+ rb_cutf_to_inum(const char * const str, int base, bool verify)
201
+ {
202
+ /* FIXME: How can this even happen? */
203
+ if (str == NULL) {
204
+ if (verify)
205
+ rb_invalid_str(str, "Integer");
206
+ return INT2FIX(0);
207
+ }
208
+
209
+ const char *s = str;
210
+
211
+ /* Skip any leading whitespace. */
212
+ while (unichar_isspace(utf_char(s)))
213
+ s = utf_next(s);
214
+
215
+ /* Figure out what sign this number uses. */
216
+ int sign;
217
+ s = rb_utf_to_inum_sign(s, &sign);
218
+
219
+ /* Do we have another sign? If so, that’s not correct. */
220
+ if (*s == '+' || *s == '-') {
221
+ if (verify)
222
+ rb_raise(rb_eArgError,
223
+ "extra sign ‘%c’ found at position %ld",
224
+ *s, s - str);
225
+ return INT2FIX(0);
226
+ }
227
+
228
+ int tmp_base = base;
229
+ s = rb_utf_to_inum_base(s, &tmp_base);
230
+ if (base <= 0)
231
+ base = tmp_base;
232
+
233
+ /* Remove preceeding 0s. */
234
+ while (*s == '0')
235
+ s++;
236
+
237
+ /* Figure out how many bits we need to represent the number. */
238
+ size_t bit_length = rb_utf_to_inum_base_bit_length(str, base);
239
+
240
+ /* If the bit_length is less than the number of bits in a VALUE we can
241
+ * try to store it as a FIXNUM. */
242
+ if (bit_length <= sizeof(VALUE) * CHAR_BIT)
243
+ return rb_utf_to_inum_as_fix(str, s, sign, base, verify);
244
+
245
+ if (verify && *str == '_')
246
+ rb_raise(rb_eArgError,
247
+ "leading digit-separator ‘_’ found at position %ld",
248
+ s - str);
249
+
250
+ bit_length = bit_length / BITSPERDIG + 1;
251
+
252
+ /* TODO: Rename these variables. */
253
+ VALUE z = bignew(bit_length, sign);
254
+ BDIGIT *zds = BDIGITS(z);
255
+ MEMZERO(zds, BDIGIT, bit_length);
256
+ int big_len = 1;
257
+
258
+ unichar non_digit = 0;
259
+ while (true) {
260
+ unichar c = utf_char(s);
261
+ s = utf_next(s);
262
+
263
+ if (rb_utf_to_inum_num_separator(str, s, verify, c, &non_digit))
264
+ continue;
265
+
266
+ int digit_value;
267
+ if (!rb_utf_to_inum_digit_value(str, s, c, base, verify, &digit_value))
268
+ break;
269
+
270
+ bool more_to_shift = true;
271
+ while (more_to_shift) {
272
+ BDIGIT_DBL num = c;
273
+
274
+ for (int i = 0; i < big_len; i++) {
275
+ num += (BDIGIT_DBL)zds[i] * base;
276
+ zds[i] = BIGLO(num);
277
+ num = BIGDN(num);
278
+ }
279
+
280
+ more_to_shift = false;
281
+ if (num != 0) {
282
+ big_len++;
283
+ more_to_shift = true;
284
+ }
285
+ }
286
+
287
+ non_digit = 0;
288
+ }
289
+
290
+ if (!verify)
291
+ return rb_big_norm(z);
292
+
293
+ s--;
294
+ if (str + 1 < s && s[-1] == '_')
295
+ rb_raise(rb_eArgError,
296
+ "trailing digit-separator ‘_’ found at position %ld",
297
+ s - str);
298
+
299
+ if (*s != '\0')
300
+ rb_raise(rb_eArgError,
301
+ "trailing garbage found at position %ld",
302
+ s - str);
303
+
304
+ return rb_big_norm(z);
305
+ }
306
+
307
+ VALUE
308
+ rb_utf_to_inum(VALUE str, int base, bool verify)
309
+ {
310
+ StringValue(str);
311
+
312
+ char *s;
313
+ if (verify)
314
+ s = StringValueCStr(str);
315
+ else
316
+ s = RSTRING(str)->ptr;
317
+
318
+ if (s != NULL) {
319
+ long len = RSTRING(str)->len;
320
+ /* no sentinel somehow */
321
+ if (s[len] != '\0') {
322
+ char *p = ALLOCA_N(char, len + 1);
323
+
324
+ MEMCPY(p, s, char, len);
325
+ p[len] = '\0';
326
+ s = p;
327
+ }
328
+ }
329
+
330
+ return rb_cutf_to_inum(s, base, verify);
331
+ }