character-encodings 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/README +26 -0
  2. data/Rakefile +157 -0
  3. data/ext/encoding/character/unicode/codepoint.c +48 -0
  4. data/ext/encoding/character/utf-8/break.c +38 -0
  5. data/ext/encoding/character/utf-8/data/break.h +22931 -0
  6. data/ext/encoding/character/utf-8/data/character-tables.h +14356 -0
  7. data/ext/encoding/character/utf-8/data/compose.h +1607 -0
  8. data/ext/encoding/character/utf-8/data/decompose.h +10925 -0
  9. data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +1065 -0
  10. data/ext/encoding/character/utf-8/decompose.c +476 -0
  11. data/ext/encoding/character/utf-8/depend +64 -0
  12. data/ext/encoding/character/utf-8/extconf.rb +47 -0
  13. data/ext/encoding/character/utf-8/private.h +68 -0
  14. data/ext/encoding/character/utf-8/properties.c +1061 -0
  15. data/ext/encoding/character/utf-8/rb_includes.h +18 -0
  16. data/ext/encoding/character/utf-8/rb_methods.h +49 -0
  17. data/ext/encoding/character/utf-8/rb_utf_aref.c +111 -0
  18. data/ext/encoding/character/utf-8/rb_utf_aset.c +105 -0
  19. data/ext/encoding/character/utf-8/rb_utf_casecmp.c +24 -0
  20. data/ext/encoding/character/utf-8/rb_utf_chomp.c +114 -0
  21. data/ext/encoding/character/utf-8/rb_utf_chop.c +44 -0
  22. data/ext/encoding/character/utf-8/rb_utf_collate.c +13 -0
  23. data/ext/encoding/character/utf-8/rb_utf_count.c +30 -0
  24. data/ext/encoding/character/utf-8/rb_utf_delete.c +60 -0
  25. data/ext/encoding/character/utf-8/rb_utf_downcase.c +13 -0
  26. data/ext/encoding/character/utf-8/rb_utf_each_char.c +27 -0
  27. data/ext/encoding/character/utf-8/rb_utf_foldcase.c +13 -0
  28. data/ext/encoding/character/utf-8/rb_utf_hex.c +14 -0
  29. data/ext/encoding/character/utf-8/rb_utf_index.c +50 -0
  30. data/ext/encoding/character/utf-8/rb_utf_insert.c +43 -0
  31. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +331 -0
  32. data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +12 -0
  33. data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +142 -0
  34. data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +41 -0
  35. data/ext/encoding/character/utf-8/rb_utf_justify.c +96 -0
  36. data/ext/encoding/character/utf-8/rb_utf_length.c +14 -0
  37. data/ext/encoding/character/utf-8/rb_utf_lstrip.c +41 -0
  38. data/ext/encoding/character/utf-8/rb_utf_normalize.c +51 -0
  39. data/ext/encoding/character/utf-8/rb_utf_oct.c +14 -0
  40. data/ext/encoding/character/utf-8/rb_utf_reverse.c +13 -0
  41. data/ext/encoding/character/utf-8/rb_utf_rindex.c +88 -0
  42. data/ext/encoding/character/utf-8/rb_utf_rstrip.c +51 -0
  43. data/ext/encoding/character/utf-8/rb_utf_squeeze.c +70 -0
  44. data/ext/encoding/character/utf-8/rb_utf_strip.c +27 -0
  45. data/ext/encoding/character/utf-8/rb_utf_to_i.c +25 -0
  46. data/ext/encoding/character/utf-8/rb_utf_tr.c +250 -0
  47. data/ext/encoding/character/utf-8/rb_utf_upcase.c +13 -0
  48. data/ext/encoding/character/utf-8/unicode.c +319 -0
  49. data/ext/encoding/character/utf-8/unicode.h +208 -0
  50. data/ext/encoding/character/utf-8/utf.c +1332 -0
  51. data/lib/encoding/character/utf-8.rb +201 -0
  52. data/specifications/aref.rb +45 -0
  53. data/specifications/count.rb +29 -0
  54. data/specifications/delete.rb +25 -0
  55. data/specifications/each_char.rb +28 -0
  56. data/specifications/index.rb +35 -0
  57. data/specifications/insert.rb +67 -0
  58. data/specifications/length.rb +45 -0
  59. data/specifications/rindex.rb +52 -0
  60. data/specifications/squeeze.rb +25 -0
  61. data/specifications/to_i.rb +54 -0
  62. data/specifications/tr.rb +39 -0
  63. data/tests/foldcase.rb +28 -0
  64. data/tests/normalize.rb +101 -0
  65. data/tests/unicodedatatestbase.rb +45 -0
  66. metadata +112 -0
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.collate module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_collate(UNUSED(VALUE self), VALUE str, VALUE other)
11
+ {
12
+ return INT2FIX(utf_collate(StringValuePtr(str), StringValuePtr(other)));
13
+ }
@@ -0,0 +1,30 @@
1
+ /*
2
+ * contents: UTF8.count module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_tr.h"
9
+
10
+ VALUE
11
+ rb_utf_count(int argc, VALUE *argv, UNUSED(VALUE self))
12
+ {
13
+ need_at_least_n_arguments(argc, 2);
14
+
15
+ VALUE str = argv[0];
16
+ StringValue(str);
17
+ if (RSTRING(str)->len == 0)
18
+ return INT2FIX(0);
19
+
20
+ unsigned int table[TR_TABLE_SIZE];
21
+ tr_setup_table_from_strings(table, argc - 1, &argv[1]);
22
+
23
+ long count = 0;
24
+ char const *p_end = RSTRING(str)->ptr + RSTRING(str)->len;
25
+ for (char const *p = RSTRING(str)->ptr; p < p_end; p = utf_next(p))
26
+ if (tr_table_lookup(table, _utf_char_validated(p, p_end)))
27
+ count++;
28
+
29
+ return LONG2NUM(count);
30
+ }
@@ -0,0 +1,60 @@
1
+ /*
2
+ * contents: UTF8.delete module functions.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_tr.h"
9
+
10
+ VALUE
11
+ rb_utf_delete_bang(int argc, VALUE *argv, UNUSED(VALUE self))
12
+ {
13
+ need_at_least_n_arguments(argc, 2);
14
+
15
+ VALUE str = argv[0];
16
+ StringValue(str);
17
+ if (RSTRING(str)->len == 0)
18
+ return Qnil;
19
+
20
+ unsigned int table[TR_TABLE_SIZE];
21
+ tr_setup_table_from_strings(table, argc - 1, &argv[1]);
22
+
23
+ rb_str_modify(str);
24
+
25
+ bool modified = false;
26
+ char *s = RSTRING(str)->ptr;
27
+ char const *s_end = s + RSTRING(str)->len;
28
+ char *t = s;
29
+ while (s < s_end) {
30
+ unichar c = utf_char(s);
31
+
32
+ char *next = rb_utf_next_validated(s, s_end);
33
+ if (tr_table_lookup(table, c)) {
34
+ modified = true;
35
+ } else {
36
+ memmove(t, s, next - s);
37
+ t += next - s;
38
+ }
39
+
40
+ s = next;
41
+ }
42
+ *t = '\0';
43
+ RSTRING(str)->len = t - RSTRING(str)->ptr;
44
+
45
+ if (modified)
46
+ return str;
47
+
48
+ return Qnil;
49
+ }
50
+
51
+ VALUE
52
+ rb_utf_delete(int argc, VALUE *argv, VALUE self)
53
+ {
54
+ need_at_least_n_arguments(argc, 2);
55
+
56
+ StringValue(argv[0]);
57
+ argv[0] = rb_utf_dup(argv[0]);
58
+ rb_utf_delete_bang(argc, argv, self);
59
+ return argv[0];
60
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.downcase module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_downcase(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ return rb_utf_alloc_using(utf_downcase(StringValuePtr(str)));
13
+ }
@@ -0,0 +1,27 @@
1
+ /*
2
+ * contents: UTF8.each_char module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_each_char(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ #if 0
13
+ RETURN_ENUMERATOR(str, 0, 0);
14
+ #endif
15
+
16
+ const char *s = RSTRING(str)->ptr;
17
+ const char *s_end = s + RSTRING(str)->len;
18
+ while (s < s_end) {
19
+ char buf[MAX_UNICHAR_BYTE_LENGTH];
20
+ int len = unichar_to_utf(_utf_char_validated(s, s_end), buf);
21
+ VALUE c = rb_utf_new(buf, len);
22
+ rb_yield(c);
23
+ s = utf_next(s);
24
+ }
25
+
26
+ return str;
27
+ }
@@ -0,0 +1,13 @@
1
+ /*
2
+ * contents: UTF8.folcase module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_foldcase(UNUSED(VALUE self), VALUE str)
11
+ {
12
+ return rb_utf_alloc_using(utf_foldcase(StringValuePtr(str)));
13
+ }
@@ -0,0 +1,14 @@
1
+ /*
2
+ * contents: UTF8.hex module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_bignum.h"
9
+
10
+ VALUE
11
+ rb_utf_hex(UNUSED(VALUE self), VALUE str)
12
+ {
13
+ return rb_utf_to_inum(str, 16, false);
14
+ }
@@ -0,0 +1,50 @@
1
+ /*
2
+ * contents: UTF8.index module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ VALUE
10
+ rb_utf_index_m(int argc, VALUE *argv, UNUSED(VALUE self))
11
+ {
12
+ VALUE str, sub, rboffset;
13
+
14
+ long offset = 0;
15
+ if (rb_scan_args(argc, argv, "21", &str, &sub, &rboffset) == 3)
16
+ offset = NUM2LONG(rboffset);
17
+
18
+ StringValue(str);
19
+
20
+ char *begin, *end;
21
+ if (!rb_utf_begin_from_offset(str, offset, &begin, &end)) {
22
+ if (TYPE(sub) == T_REGEXP)
23
+ rb_backref_set(Qnil);
24
+
25
+ return Qnil;
26
+ }
27
+
28
+ switch (TYPE(sub)) {
29
+ case T_REGEXP:
30
+ offset = rb_utf_index_regexp(str, begin, end, sub, offset, false);
31
+ break;
32
+ default: {
33
+ VALUE tmp = rb_check_string_type(sub);
34
+ if (NIL_P(tmp))
35
+ rb_raise(rb_eTypeError, "type mismatch: %s given",
36
+ rb_obj_classname(sub));
37
+
38
+ sub = tmp;
39
+ }
40
+ /* fall through */
41
+ case T_STRING:
42
+ offset = rb_utf_index(str, sub, offset);
43
+ break;
44
+ }
45
+
46
+ if (offset < 0)
47
+ return Qnil;
48
+
49
+ return LONG2NUM(offset);
50
+ }
@@ -0,0 +1,43 @@
1
+ /*
2
+ * contents: UTF8.insert module function.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+
9
+ /* TODO: Update to use new offset-calculating functions. */
10
+ VALUE
11
+ rb_utf_insert(UNUSED(VALUE self), VALUE str, VALUE index, VALUE other)
12
+ {
13
+ long offset = NUM2LONG(index);
14
+
15
+ StringValue(str);
16
+
17
+ long n_chars = utf_length_n(RSTRING(str)->ptr, RSTRING(str)->len);
18
+
19
+ if (abs(offset) > n_chars) {
20
+ if (offset < 0)
21
+ offset -= n_chars;
22
+ rb_raise(rb_eIndexError, "index %ld out of string", offset);
23
+ }
24
+
25
+ long byte_index;
26
+
27
+ if (offset == -1) {
28
+ byte_index = RSTRING(str)->len;
29
+ } else {
30
+ if (offset < 0)
31
+ offset++;
32
+
33
+ char *s = RSTRING(str)->ptr;
34
+
35
+ if (offset < 0)
36
+ s += RSTRING(str)->len;
37
+ byte_index = utf_offset_to_pointer(s, offset) - s;
38
+ }
39
+
40
+ rb_str_update(str, byte_index, 0, other);
41
+
42
+ return str;
43
+ }
@@ -0,0 +1,331 @@
1
+ /*
2
+ * contents: Internal functionality for turning strings into Bignums.
3
+ *
4
+ * Copyright © 2006 Nikolai Weibull <now@bitwi.se>
5
+ */
6
+
7
+ #include "rb_includes.h"
8
+ #include "rb_utf_internal_bignum.h"
9
+
10
+ /* Stolen straight from bignum.c. */
11
+ #define BDIGITS(x) ((BDIGIT *)RBIGNUM(x)->digits)
12
+ #define BITSPERDIG (SIZEOF_BDIGITS * CHAR_BIT)
13
+ #define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG)
14
+ #define BIGDN(x) RSHIFT(x, BITSPERDIG)
15
+ #define BIGLO(x) ((BDIGIT)((x) & (BIGRAD - 1)))
16
+
17
+ static VALUE
18
+ bignew_1(VALUE klass, long len, int sign)
19
+ {
20
+ NEWOBJ(big, struct RBignum);
21
+ OBJSETUP(big, klass, T_BIGNUM);
22
+ big->sign = sign ? 1 : 0;
23
+ big->len = len;
24
+ big->digits = ALLOC_N(BDIGIT, len);
25
+
26
+ return (VALUE)big;
27
+ }
28
+
29
+ #define bignew(len, sign) bignew_1(rb_cBignum, len, sign)
30
+
31
+ static const char *
32
+ rb_utf_to_inum_sign(const char *s, int *sign)
33
+ {
34
+ *sign = 1;
35
+
36
+ if (*s == '-')
37
+ *sign = 0;
38
+
39
+ if (*s == '+' || *s == '-')
40
+ return s + 1;
41
+
42
+ return s;
43
+ }
44
+
45
+ static const char *
46
+ rb_utf_to_inum_base(const char *s, int *base)
47
+ {
48
+ if (s[0] == '0') {
49
+ int offset = 2;
50
+ switch (s[1]) {
51
+ case 'x': case 'X':
52
+ *base = 16;
53
+ break;
54
+ case 'b': case 'B':
55
+ *base = 2;
56
+ break;
57
+ case 'o': case 'O':
58
+ *base = 8;
59
+ break;
60
+ case 'd': case 'D':
61
+ *base = 10;
62
+ break;
63
+ default:
64
+ *base = 8;
65
+ offset = 1;
66
+ break;
67
+ }
68
+ return s + offset;
69
+ } else if (*base < -1) {
70
+ *base = -*base;
71
+ } else {
72
+ *base = 10;
73
+ }
74
+
75
+ return s;
76
+ }
77
+
78
+ static size_t
79
+ rb_utf_to_inum_base_bit_length(const char *s, int base)
80
+ {
81
+ if (base < 2 || base > 36)
82
+ rb_raise(rb_eArgError, "illegal radix %d", base);
83
+
84
+ size_t bit_length;
85
+ switch (base) {
86
+ case 2:
87
+ bit_length = 1;
88
+ case 3:
89
+ bit_length = 2;
90
+ case 4: case 5: case 6: case 7: case 8:
91
+ bit_length = 3;
92
+ case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
93
+ bit_length = 4;
94
+ default:
95
+ if (base <= 32)
96
+ bit_length = 5;
97
+
98
+ bit_length = 6;
99
+ }
100
+
101
+ return bit_length * utf_length(s);
102
+ }
103
+
104
+ static bool
105
+ rb_utf_to_inum_num_separator(const char *str, const char *s, bool verify,
106
+ unichar c, unichar *non_digit)
107
+ {
108
+ if (c != '_')
109
+ return false;
110
+
111
+ if (!verify)
112
+ return true;
113
+
114
+ if (*non_digit != 0)
115
+ rb_raise(rb_eArgError,
116
+ "unexpected ‘%lc’ found at position %ld", c, s - str);
117
+
118
+ *non_digit = c;
119
+
120
+ return true;
121
+ }
122
+
123
+ static bool
124
+ rb_utf_to_inum_digit_value(const char *str, const char *s, unichar c,
125
+ int base, bool verify, int *digit_value)
126
+ {
127
+ /* If we stumble upon a space, return false so that we may end our
128
+ * processing and skip over any trailing white-space. */
129
+ if (unichar_isspace(c))
130
+ return false;
131
+
132
+ int value = unichar_xdigit_value(c);
133
+ if (value == -1) {
134
+ if (!verify)
135
+ return false;
136
+ rb_raise(rb_eArgError,
137
+ "non-digit character ‘%lc’ found at position %ld",
138
+ c, s - str);
139
+ }
140
+
141
+ if (value >= base) {
142
+ if (!verify)
143
+ return false;
144
+
145
+ rb_raise(rb_eArgError,
146
+ "value (%d) greater than base (%d) at position %ld",
147
+ value, base, s - str);
148
+ }
149
+
150
+ *digit_value = value;
151
+
152
+ return true;
153
+ }
154
+
155
+ static VALUE
156
+ rb_utf_to_inum_as_fix(const char *str, const char *s, int sign, int base,
157
+ bool verify)
158
+ {
159
+ unsigned long value = 0;
160
+
161
+ unichar non_digit = 0;
162
+ while (*s != '\0') {
163
+ unichar c = utf_char(s);
164
+ s = utf_next(s);
165
+
166
+ if (rb_utf_to_inum_num_separator(str, s, verify, c, &non_digit))
167
+ continue;
168
+
169
+ int digit_value;
170
+ if (!rb_utf_to_inum_digit_value(str, s, c, base, verify, &digit_value))
171
+ break;
172
+ value *= base;
173
+ value += digit_value;
174
+
175
+ non_digit = 0;
176
+ }
177
+
178
+ if (verify) {
179
+ while (*s != '\0' && unichar_isspace(utf_char(s)))
180
+ s = utf_next(s);
181
+ if (*s != '\0')
182
+ rb_raise(rb_eArgError,
183
+ "trailing garbage found at position %ld",
184
+ s - str);
185
+ }
186
+
187
+ if (POSFIXABLE(value)) {
188
+ if (sign)
189
+ return LONG2FIX(value);
190
+ else
191
+ return LONG2FIX(-(long)value);
192
+ }
193
+
194
+ VALUE big = rb_uint2big(value);
195
+ RBIGNUM(big)->sign = sign;
196
+ return rb_big_norm(big);
197
+ }
198
+
199
+ static VALUE
200
+ rb_cutf_to_inum(const char * const str, int base, bool verify)
201
+ {
202
+ /* FIXME: How can this even happen? */
203
+ if (str == NULL) {
204
+ if (verify)
205
+ rb_invalid_str(str, "Integer");
206
+ return INT2FIX(0);
207
+ }
208
+
209
+ const char *s = str;
210
+
211
+ /* Skip any leading whitespace. */
212
+ while (unichar_isspace(utf_char(s)))
213
+ s = utf_next(s);
214
+
215
+ /* Figure out what sign this number uses. */
216
+ int sign;
217
+ s = rb_utf_to_inum_sign(s, &sign);
218
+
219
+ /* Do we have another sign? If so, that’s not correct. */
220
+ if (*s == '+' || *s == '-') {
221
+ if (verify)
222
+ rb_raise(rb_eArgError,
223
+ "extra sign ‘%c’ found at position %ld",
224
+ *s, s - str);
225
+ return INT2FIX(0);
226
+ }
227
+
228
+ int tmp_base = base;
229
+ s = rb_utf_to_inum_base(s, &tmp_base);
230
+ if (base <= 0)
231
+ base = tmp_base;
232
+
233
+ /* Remove preceeding 0s. */
234
+ while (*s == '0')
235
+ s++;
236
+
237
+ /* Figure out how many bits we need to represent the number. */
238
+ size_t bit_length = rb_utf_to_inum_base_bit_length(str, base);
239
+
240
+ /* If the bit_length is less than the number of bits in a VALUE we can
241
+ * try to store it as a FIXNUM. */
242
+ if (bit_length <= sizeof(VALUE) * CHAR_BIT)
243
+ return rb_utf_to_inum_as_fix(str, s, sign, base, verify);
244
+
245
+ if (verify && *str == '_')
246
+ rb_raise(rb_eArgError,
247
+ "leading digit-separator ‘_’ found at position %ld",
248
+ s - str);
249
+
250
+ bit_length = bit_length / BITSPERDIG + 1;
251
+
252
+ /* TODO: Rename these variables. */
253
+ VALUE z = bignew(bit_length, sign);
254
+ BDIGIT *zds = BDIGITS(z);
255
+ MEMZERO(zds, BDIGIT, bit_length);
256
+ int big_len = 1;
257
+
258
+ unichar non_digit = 0;
259
+ while (true) {
260
+ unichar c = utf_char(s);
261
+ s = utf_next(s);
262
+
263
+ if (rb_utf_to_inum_num_separator(str, s, verify, c, &non_digit))
264
+ continue;
265
+
266
+ int digit_value;
267
+ if (!rb_utf_to_inum_digit_value(str, s, c, base, verify, &digit_value))
268
+ break;
269
+
270
+ bool more_to_shift = true;
271
+ while (more_to_shift) {
272
+ BDIGIT_DBL num = c;
273
+
274
+ for (int i = 0; i < big_len; i++) {
275
+ num += (BDIGIT_DBL)zds[i] * base;
276
+ zds[i] = BIGLO(num);
277
+ num = BIGDN(num);
278
+ }
279
+
280
+ more_to_shift = false;
281
+ if (num != 0) {
282
+ big_len++;
283
+ more_to_shift = true;
284
+ }
285
+ }
286
+
287
+ non_digit = 0;
288
+ }
289
+
290
+ if (!verify)
291
+ return rb_big_norm(z);
292
+
293
+ s--;
294
+ if (str + 1 < s && s[-1] == '_')
295
+ rb_raise(rb_eArgError,
296
+ "trailing digit-separator ‘_’ found at position %ld",
297
+ s - str);
298
+
299
+ if (*s != '\0')
300
+ rb_raise(rb_eArgError,
301
+ "trailing garbage found at position %ld",
302
+ s - str);
303
+
304
+ return rb_big_norm(z);
305
+ }
306
+
307
+ VALUE
308
+ rb_utf_to_inum(VALUE str, int base, bool verify)
309
+ {
310
+ StringValue(str);
311
+
312
+ char *s;
313
+ if (verify)
314
+ s = StringValueCStr(str);
315
+ else
316
+ s = RSTRING(str)->ptr;
317
+
318
+ if (s != NULL) {
319
+ long len = RSTRING(str)->len;
320
+ /* no sentinel somehow */
321
+ if (s[len] != '\0') {
322
+ char *p = ALLOCA_N(char, len + 1);
323
+
324
+ MEMCPY(p, s, char, len);
325
+ p[len] = '\0';
326
+ s = p;
327
+ }
328
+ }
329
+
330
+ return rb_cutf_to_inum(s, base, verify);
331
+ }