RubyGems - u - Versions diffs - 0.5.0 → 1.0.0 - Mend

u 0.5.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (316) hide show

checksums.yaml +7 -0
data/build/ext/u/data/attributes.rb +39 -0
data/build/ext/u/data/bidi-mirroring.rb +27 -0
data/build/ext/u/data/canonical-combining-class.rb +15 -0
data/build/ext/u/data/case-folding.rb +39 -0
data/build/ext/u/data/cased.rb +19 -0
data/build/ext/u/data/compose.rb +304 -0
data/build/ext/u/data/constants.rb +31 -0
data/build/ext/u/data/decompose.rb +85 -0
data/build/ext/u/data/general-category.rb +61 -0
data/build/ext/u/data/grapheme-word-break.rb +15 -0
data/build/ext/u/data/marshalled.rb +5 -0
data/build/ext/u/data/script.rb +91 -0
data/build/ext/u/data/soft-dotted.rb +17 -0
data/build/ext/u/data/title-table.rb +30 -0
data/build/ext/u/data/wide.rb +17 -0
data/build/lib/u/build.rb +8 -0
data/build/lib/u/build/data.rb +16 -0
data/build/lib/u/build/data/bidimirroring.rb +26 -0
data/build/lib/u/build/data/break.rb +14 -0
data/build/lib/u/build/data/casefolding.rb +77 -0
data/build/lib/u/build/data/compositionexclusions.rb +14 -0
data/build/lib/u/build/data/derivedeastasianwidth.rb +15 -0
data/build/lib/u/build/data/file.rb +88 -0
data/build/lib/u/build/data/linebreak.rb +14 -0
data/build/lib/u/build/data/proplist.rb +18 -0
data/build/lib/u/build/data/scripts.rb +22 -0
data/build/lib/u/build/data/specialcasing.rb +106 -0
data/build/lib/u/build/data/unicode.rb +41 -0
data/build/lib/u/build/data/unicode/entry.rb +27 -0
data/build/lib/u/build/data/unicode/entry/decomposition.rb +29 -0
data/build/lib/u/build/data/unicode/points.rb +32 -0
data/build/lib/u/build/header.rb +11 -0
data/build/lib/u/build/header/table.rb +19 -0
data/build/lib/u/build/header/table/row.rb +64 -0
data/build/lib/u/build/header/tables.rb +6 -0
data/build/lib/u/build/header/tables/intervals.rb +50 -0
data/build/lib/u/build/header/tables/split.rb +20 -0
data/build/lib/u/build/header/tables/split/data.rb +29 -0
data/build/lib/u/build/header/tables/split/part1.rb +28 -0
data/build/lib/u/build/header/tables/split/part2.rb +13 -0
data/build/lib/u/build/header/tables/split/row.rb +34 -0
data/build/lib/u/build/header/tables/split/rows.rb +22 -0
data/build/test/unit/break.rb +45 -0
data/build/test/unit/case.rb +178 -0
data/build/test/unit/foldcase.rb +44 -0
data/build/test/unit/normalize.rb +81 -0
data/ext/u/attributes.c +62 -0
data/ext/u/attributes.h +5 -0
data/ext/u/case.h +41 -0
data/ext/u/data/attributes.h +3070 -0
data/ext/u/data/bidi-mirroring.h +373 -0
data/ext/u/data/canonical-combining-class.h +2157 -0
data/ext/u/data/case-folding.h +171 -0
data/ext/u/data/cased.h +42 -0
data/ext/u/data/compose.h +1714 -0
data/ext/u/data/constants.h +17 -0
data/ext/u/data/decompose.h +9356 -0
data/ext/u/data/general-category.h +28959 -0
data/ext/u/data/grapheme-break.h +13201 -0
data/ext/u/data/line-break.h +26501 -0
data/ext/u/data/normalization-quick-check.h +3002 -0
data/ext/u/data/script.h +2928 -0
data/ext/u/data/soft-dotted.h +55 -0
data/ext/u/data/title-table.h +41 -0
data/ext/u/data/types.h +11117 -0
data/ext/u/data/wide-cjk.h +197 -0
data/ext/u/data/wide.h +59 -0
data/ext/u/data/word-break.h +10001 -0
data/ext/u/depend +281 -0
data/ext/u/extconf.rb +158 -0
data/ext/u/output.h +51 -0
data/ext/{encoding/character/utf-8 → u}/private.c +11 -15
data/ext/u/private.h +58 -0
data/ext/u/rb_includes.h +10 -0
data/ext/u/rb_private.c +98 -0
data/ext/u/rb_private.h +67 -0
data/ext/u/rb_u.c +251 -0
data/ext/u/rb_u_buffer.c +443 -0
data/ext/u/rb_u_buffer.h +24 -0
data/ext/u/rb_u_re.c +43 -0
data/ext/u/rb_u_re.h +15 -0
data/ext/u/rb_u_string.c +478 -0
data/ext/u/rb_u_string.h +173 -0
data/ext/u/rb_u_string_alnum.c +10 -0
data/ext/u/rb_u_string_alpha.c +10 -0
data/ext/u/rb_u_string_aref.c +142 -0
data/ext/u/rb_u_string_ascii_only.c +13 -0
data/ext/u/rb_u_string_assigned.c +10 -0
data/ext/u/rb_u_string_b.c +18 -0
data/ext/u/rb_u_string_bytesize.c +10 -0
data/ext/u/rb_u_string_byteslice.c +103 -0
data/ext/u/rb_u_string_canonical_combining_class.c +33 -0
data/ext/u/rb_u_string_case_ignorable.c +25 -0
data/ext/u/rb_u_string_casecmp.c +61 -0
data/ext/u/rb_u_string_cased.c +17 -0
data/ext/u/rb_u_string_chomp.c +107 -0
data/ext/u/rb_u_string_chop.c +33 -0
data/ext/u/rb_u_string_chr.c +9 -0
data/ext/u/rb_u_string_cntrl.c +10 -0
data/ext/u/rb_u_string_collate.c +46 -0
data/ext/u/rb_u_string_collation_key.c +18 -0
data/ext/u/rb_u_string_count.c +38 -0
data/ext/u/rb_u_string_defined.c +10 -0
data/ext/u/rb_u_string_delete.c +62 -0
data/ext/u/rb_u_string_digit.c +10 -0
data/ext/u/rb_u_string_downcase.c +13 -0
data/ext/u/rb_u_string_dump.c +153 -0
data/ext/u/rb_u_string_each_byte.c +46 -0
data/ext/u/rb_u_string_each_char.c +49 -0
data/ext/u/rb_u_string_each_codepoint.c +45 -0
data/ext/u/rb_u_string_each_grapheme_cluster.c +36 -0
data/ext/u/rb_u_string_each_line.c +142 -0
data/ext/u/rb_u_string_each_word.c +34 -0
data/ext/u/rb_u_string_empty.c +11 -0
data/ext/u/rb_u_string_end_with.c +31 -0
data/ext/u/rb_u_string_eql.c +30 -0
data/ext/u/rb_u_string_equal.c +33 -0
data/ext/u/rb_u_string_foldcase.c +12 -0
data/ext/u/rb_u_string_folded.c +13 -0
data/ext/u/rb_u_string_format.c +1745 -0
data/ext/u/rb_u_string_general_category.c +109 -0
data/ext/u/rb_u_string_getbyte.c +21 -0
data/ext/u/rb_u_string_graph.c +21 -0
data/ext/u/rb_u_string_grapheme_break.c +61 -0
data/ext/u/rb_u_string_gsub.c +164 -0
data/ext/u/rb_u_string_hash.c +10 -0
data/ext/u/rb_u_string_hex.c +9 -0
data/ext/u/rb_u_string_include.c +10 -0
data/ext/u/rb_u_string_index.c +110 -0
data/ext/u/rb_u_string_inspect.c +189 -0
data/ext/u/rb_u_string_internal_tr.c +148 -0
data/ext/u/rb_u_string_internal_tr.h +29 -0
data/ext/u/rb_u_string_justify.c +169 -0
data/ext/u/rb_u_string_length.c +10 -0
data/ext/u/rb_u_string_line_break.c +115 -0
data/ext/u/rb_u_string_lower.c +13 -0
data/ext/u/rb_u_string_lstrip.c +24 -0
data/ext/u/rb_u_string_match.c +65 -0
data/ext/u/rb_u_string_mirror.c +16 -0
data/ext/u/rb_u_string_newline.c +21 -0
data/ext/u/rb_u_string_normalize.c +70 -0
data/ext/u/rb_u_string_normalized.c +28 -0
data/ext/u/rb_u_string_oct.c +11 -0
data/ext/u/rb_u_string_ord.c +14 -0
data/ext/u/rb_u_string_partition.c +80 -0
data/ext/u/rb_u_string_plus.c +33 -0
data/ext/u/rb_u_string_print.c +10 -0
data/ext/u/rb_u_string_punct.c +10 -0
data/ext/u/rb_u_string_reverse.c +13 -0
data/ext/u/rb_u_string_rindex.c +104 -0
data/ext/u/rb_u_string_rpartition.c +81 -0
data/ext/u/rb_u_string_rstrip.c +29 -0
data/ext/u/rb_u_string_scan.c +109 -0
data/ext/u/rb_u_string_script.c +253 -0
data/ext/u/rb_u_string_soft_dotted.c +13 -0
data/ext/u/rb_u_string_space.c +24 -0
data/ext/u/rb_u_string_split.c +245 -0
data/ext/u/rb_u_string_squeeze.c +75 -0
data/ext/u/rb_u_string_start_with.c +31 -0
data/ext/u/rb_u_string_strip.c +36 -0
data/ext/u/rb_u_string_sub.c +147 -0
data/ext/u/rb_u_string_times.c +35 -0
data/ext/u/rb_u_string_title.c +10 -0
data/ext/u/rb_u_string_titlecase.c +13 -0
data/ext/u/rb_u_string_to_i.c +45 -0
data/ext/u/rb_u_string_to_inum.c +364 -0
data/ext/u/rb_u_string_to_inum.h +1 -0
data/ext/u/rb_u_string_to_str.c +17 -0
data/ext/u/rb_u_string_to_sym.c +12 -0
data/ext/u/rb_u_string_tr.c +290 -0
data/ext/u/rb_u_string_upcase.c +12 -0
data/ext/u/rb_u_string_upper.c +13 -0
data/ext/u/rb_u_string_valid.c +10 -0
data/ext/u/rb_u_string_valid_encoding.c +12 -0
data/ext/u/rb_u_string_wide.c +21 -0
data/ext/u/rb_u_string_wide_cjk.c +21 -0
data/ext/u/rb_u_string_width.c +19 -0
data/ext/u/rb_u_string_word_break.c +63 -0
data/ext/u/rb_u_string_xdigit.c +22 -0
data/ext/u/rb_u_string_zero_width.c +16 -0
data/ext/u/titled.c +55 -0
data/ext/u/titled.h +1 -0
data/ext/u/u.c +23 -0
data/ext/u/u.h +458 -0
data/ext/u/u_char_canonical_combining_class.c +31 -0
data/ext/u/u_char_digit_value.c +21 -0
data/ext/u/u_char_downcase.c +27 -0
data/ext/u/u_char_general_category.c +31 -0
data/ext/u/u_char_grapheme_break.c +28 -0
data/ext/u/u_char_isalnum.c +24 -0
data/ext/u/u_char_isalpha.c +21 -0
data/ext/u/u_char_isassigned.c +16 -0
data/ext/u/u_char_iscased.c +22 -0
data/ext/u/u_char_iscaseignorable.c +29 -0
data/ext/u/u_char_iscntrl.c +17 -0
data/ext/u/u_char_isdefined.c +15 -0
data/ext/u/u_char_isdigit.c +16 -0
data/ext/u/u_char_isgraph.c +22 -0
data/ext/u/u_char_islower.c +16 -0
data/ext/u/u_char_isnewline.c +24 -0
data/ext/u/u_char_isprint.c +21 -0
data/ext/u/u_char_ispunct.c +27 -0
data/ext/u/u_char_issoftdotted.c +18 -0
data/ext/u/u_char_isspace.c +28 -0
data/ext/u/u_char_isupper.c +16 -0
data/ext/u/u_char_isvalid.c +18 -0
data/ext/u/u_char_iswide.c +18 -0
data/ext/u/u_char_iswide_cjk.c +22 -0
data/ext/u/u_char_isxdigit.c +27 -0
data/ext/u/u_char_iszerowidth.c +29 -0
data/ext/u/u_char_line_break.c +29 -0
data/ext/u/u_char_mirror.c +16 -0
data/ext/u/u_char_normalized.c +23 -0
data/ext/u/u_char_script.c +41 -0
data/ext/u/u_char_to_u.c +48 -0
data/ext/u/u_char_upcase.c +24 -0
data/ext/u/u_char_width.c +12 -0
data/ext/u/u_char_word_break.c +28 -0
data/ext/u/u_char_xdigit_value.c +31 -0
data/ext/u/u_collate.c +83 -0
data/ext/u/u_collation_key.c +132 -0
data/ext/u/u_decode.c +156 -0
data/ext/u/u_downcase.c +201 -0
data/ext/u/u_foldcase.c +68 -0
data/ext/u/u_grapheme_clusters.c +57 -0
data/ext/u/u_has_prefix.c +27 -0
data/ext/u/u_index.c +93 -0
data/ext/u/u_is_ascii_only.c +33 -0
data/ext/u/u_locale.c +40 -0
data/ext/u/u_locale.h +14 -0
data/ext/u/u_mirror.c +20 -0
data/ext/u/u_n_bytes.c +16 -0
data/ext/u/u_n_chars.c +43 -0
data/ext/u/u_normalize.c +232 -0
data/ext/u/u_normalized.c +28 -0
data/ext/u/u_offset_to_pointer.c +62 -0
data/ext/u/u_pointer_to_offset.c +23 -0
data/ext/u/u_recode.c +73 -0
data/ext/u/u_reverse.c +21 -0
data/ext/u/u_rindex.c +132 -0
data/ext/u/u_titlecase.c +68 -0
data/ext/u/u_upcase.c +89 -0
data/ext/u/u_width.c +35 -0
data/ext/u/u_words.c +82 -0
data/ext/u/yield.h +27 -0
data/lib/u-1.0.rb +20 -0
data/lib/u-1.0/buffer.rb +10 -0
data/lib/u-1.0/string.rb +9 -0
data/lib/u-1.0/version.rb +287 -0
data/test/unit/case.rb +2080 -0
data/test/unit/foldcase.rb +1136 -0
data/test/unit/graphemebreak.rb +407 -0
data/test/unit/normalize.rb +367545 -0
data/test/unit/u-1.0.rb +10 -0
data/test/unit/u-1.0/buffer.rb +52 -0
data/test/unit/u-1.0/string.rb +1439 -0
data/test/unit/{u.rb → u-1.0/version.rb} +0 -1
data/test/unit/wordbreak.rb +1083 -0
metadata +603 -148
data/README +0 -38
data/Rakefile +0 -64
data/ext/encoding/character/utf-8/break.c +0 -25
data/ext/encoding/character/utf-8/data/break.h +0 -22931
data/ext/encoding/character/utf-8/data/character-tables.h +0 -14358
data/ext/encoding/character/utf-8/data/compose.h +0 -1607
data/ext/encoding/character/utf-8/data/decompose.h +0 -10926
data/ext/encoding/character/utf-8/data/generate-unicode-data.rb +0 -1070
data/ext/encoding/character/utf-8/decompose.c +0 -444
data/ext/encoding/character/utf-8/depend +0 -65
data/ext/encoding/character/utf-8/extconf.rb +0 -67
data/ext/encoding/character/utf-8/private.h +0 -51
data/ext/encoding/character/utf-8/properties.c +0 -1056
data/ext/encoding/character/utf-8/rb_includes.h +0 -19
data/ext/encoding/character/utf-8/rb_methods.h +0 -49
data/ext/encoding/character/utf-8/rb_private.h +0 -52
data/ext/encoding/character/utf-8/rb_utf_aref.c +0 -111
data/ext/encoding/character/utf-8/rb_utf_aset.c +0 -105
data/ext/encoding/character/utf-8/rb_utf_casecmp.c +0 -24
data/ext/encoding/character/utf-8/rb_utf_chomp.c +0 -114
data/ext/encoding/character/utf-8/rb_utf_chop.c +0 -44
data/ext/encoding/character/utf-8/rb_utf_collate.c +0 -13
data/ext/encoding/character/utf-8/rb_utf_count.c +0 -30
data/ext/encoding/character/utf-8/rb_utf_delete.c +0 -60
data/ext/encoding/character/utf-8/rb_utf_downcase.c +0 -13
data/ext/encoding/character/utf-8/rb_utf_each_char.c +0 -27
data/ext/encoding/character/utf-8/rb_utf_foldcase.c +0 -13
data/ext/encoding/character/utf-8/rb_utf_hex.c +0 -14
data/ext/encoding/character/utf-8/rb_utf_index.c +0 -50
data/ext/encoding/character/utf-8/rb_utf_insert.c +0 -48
data/ext/encoding/character/utf-8/rb_utf_internal_bignum.c +0 -332
data/ext/encoding/character/utf-8/rb_utf_internal_bignum.h +0 -12
data/ext/encoding/character/utf-8/rb_utf_internal_tr.c +0 -142
data/ext/encoding/character/utf-8/rb_utf_internal_tr.h +0 -41
data/ext/encoding/character/utf-8/rb_utf_justify.c +0 -96
data/ext/encoding/character/utf-8/rb_utf_length.c +0 -14
data/ext/encoding/character/utf-8/rb_utf_lstrip.c +0 -41
data/ext/encoding/character/utf-8/rb_utf_normalize.c +0 -51
data/ext/encoding/character/utf-8/rb_utf_oct.c +0 -14
data/ext/encoding/character/utf-8/rb_utf_reverse.c +0 -13
data/ext/encoding/character/utf-8/rb_utf_rindex.c +0 -88
data/ext/encoding/character/utf-8/rb_utf_rstrip.c +0 -51
data/ext/encoding/character/utf-8/rb_utf_squeeze.c +0 -70
data/ext/encoding/character/utf-8/rb_utf_strip.c +0 -27
data/ext/encoding/character/utf-8/rb_utf_to_i.c +0 -25
data/ext/encoding/character/utf-8/rb_utf_tr.c +0 -250
data/ext/encoding/character/utf-8/rb_utf_upcase.c +0 -13
data/ext/encoding/character/utf-8/tables.h +0 -38
data/ext/encoding/character/utf-8/unicode.c +0 -319
data/ext/encoding/character/utf-8/unicode.h +0 -216
data/ext/encoding/character/utf-8/utf.c +0 -1334
data/lib/encoding/character/utf-8.rb +0 -201
data/lib/u.rb +0 -16
data/lib/u/string.rb +0 -185
data/lib/u/version.rb +0 -5
data/test/unit/u/string.rb +0 -91

data/ext/u/rb_u_string_times.c ADDED

@@ -0,0 +1,35 @@
+#include "rb_includes.h"
+/* @overload *(n)
+ *   @param [#to_int] n
+ *   @raise [ArgumentError] If N < 0
+ *   @raise [ArgumentError] If N > 0 and N × {#bytesize} > LONG_MAX
+ *   @return [U::String] The concatenation of N copies of the receiver,
+ *     inheriting any taint and untrust */
+VALUE
+rb_u_string_times(VALUE self, VALUE rbtimes)
+{
+        const struct rb_u_string *string = RVAL2USTRING(self);
+        long times = NUM2LONG(rbtimes);
+        if (times < 0)
+                rb_u_raise(rb_eArgError, "negative argument: %ld", times);
+        /* TODO: Isn’t this off by one, as we add one to length for the
+         * ALLOC_N() call? */
+        if (times > 0 && LONG_MAX / times < USTRING_LENGTH(string))
+                rb_u_raise(rb_eArgError, "argument too big: %ld", times);
+        long length = times * USTRING_LENGTH(string);
+        char *product = ALLOC_N(char, length + 1);
+        long i = USTRING_LENGTH(string);
+        if (i > 0) {
+                memcpy(product, USTRING_STR(string), i);
+                for ( ; i <= times / 2; i *= 2)
+                        memcpy(product + i, product, i);
+                memcpy(product + i, product, times - i);
+        }
+        product[length] = '\0';
+        return rb_u_string_new_c_own(self, product, length);
+}

data/ext/u/rb_u_string_title.c ADDED

@@ -0,0 +1,10 @@
+#include "rb_includes.h"
+/* @overload title?
+ *   @return [Boolean] True if the receiver contains only characters in the
+ *     general category Letter, Titlecase (Lt) */
+VALUE
+rb_u_string_title(VALUE self)
+{
+        return _rb_u_character_test(self, u_char_istitle);
+}

data/ext/u/rb_u_string_titlecase.c ADDED

@@ -0,0 +1,13 @@
+#include "rb_includes.h"
+/* @overload titlecase(locale = ENV['LC_CTYPE'])
+ *   @param [#to_str] locale
+ *   @return [U::String] The title-casing of the receiver according to the
+ *     rules of the language of LOCALE, which may be empty to specifically use
+ *     the default, language-independent, rules, inheriting any taint and
+ *     untrust */
+VALUE
+rb_u_string_titlecase(int argc, VALUE *argv, VALUE self)
+{
+        return _rb_u_string_convert_locale(argc, argv, self, u_titlecase, NULL);
+}

data/ext/u/rb_u_string_to_i.c ADDED

@@ -0,0 +1,45 @@
+#include "rb_includes.h"
+#include "rb_u_string_to_inum.h"
+/* @overload to_i(base = 16)
+ *
+ *   Returns the Integer value that results from treating the receiver as a
+ *   string of digits in BASE.
+ *
+ *   The conversion algorithm is
+ *
+ *   1. Skip any leading {#space?}s
+ *   2. Check for an optional sign, ‘+’ or ‘-’
+ *   3. If base is 2, skip an optional “0b” or “0B” prefix
+ *   4. If base is 8, skip an optional “0o” or “0o” prefix
+ *   5. If base is 10, skip an optional “0d” or “0D” prefix
+ *   6. If base is 16, skip an optional “0x” or “0X” prefix
+ *   7. Skip any ‘0’s
+ *   8. Read an as long sequence of digits in BASE separated by optional U+005F
+ *      LOW LINE characters, using letters in the following ranges of characters
+ *      for digits or the characters digit value, if any
+ *
+ *       * U+0041 LATIN CAPITAL LETTER A through U+005A LATIN CAPITAL LETTER Z
+ *       * U+0061 LATIN SMALL LETTER A through U+007A LATIN SMALL LETTER Z
+ *       * U+FF21 FULLWIDTH LATIN CAPITAL LETTER A through U+FF3A FULLWIDTH LATIN CAPITAL LETTER Z
+ *       * U+FF41 FULLWIDTH LATIN SMALL LETTER A through U+FF5A FULLWIDTH LATIN SMALL LETTER Z
+ *
+ *      Note that only one separator is allowed in a row.
+ *
+ *   @param [#to_int] base
+ *   @raise [ArgumentError] Unless 2 ≤ BASE ≤ 36
+ *   @return [Integer] */
+VALUE
+rb_u_string_to_i(int argc, VALUE *argv, VALUE self)
+{
+        int base = 10;
+        VALUE rbbase;
+        if (rb_scan_args(argc, argv, "01", &rbbase) == 1)
+                base = NUM2INT(rbbase);
+        if (base < 0)
+                rb_u_raise(rb_eArgError, "illegal radix %d", base);
+        return rb_u_string_to_inum(self, base, false);
+}

data/ext/u/rb_u_string_to_inum.c ADDED

@@ -0,0 +1,364 @@
+#include "rb_includes.h"
+#include "rb_u_string_to_inum.h"
+/* XXX: Stolen straight from bignum.c. */
+#define BDIGITS(x)      (RBIGNUM_DIGITS(x))
+#define BITSPERDIG      (SIZEOF_BDIGITS * CHAR_BIT)
+#define BIGRAD          ((BDIGIT_DBL)1 << BITSPERDIG)
+#define BIGDN(x)        RSHIFT((x), BITSPERDIG)
+#define BIGLO(x)        ((BDIGIT)((x) & (BIGRAD - 1)))
+static VALUE
+bignew_1(VALUE klass, long len, int sign)
+{
+    NEWOBJ(big, struct RBignum);
+    OBJSETUP(big, klass, T_BIGNUM);
+    RBIGNUM_SET_SIGN(big, sign ? 1 : 0);
+#ifdef RBIGNUM_EMBED_LEN_MAX
+#define RBIGNUM_SET_LEN(b,l) \
+    ((RBASIC(b)->flags & RBIGNUM_EMBED_FLAG) ? \
+     (void)(RBASIC(b)->flags = \
+            (RBASIC(b)->flags & ~RBIGNUM_EMBED_LEN_MASK) | \
+            ((l) << RBIGNUM_EMBED_LEN_SHIFT)) : \
+     (void)(RBIGNUM(b)->as.heap.len = (l)))
+    if (len <= RBIGNUM_EMBED_LEN_MAX) {
+        RBASIC(big)->flags |= RBIGNUM_EMBED_FLAG;
+        RBIGNUM_SET_LEN(big, len);
+    }
+    else {
+        RBIGNUM(big)->as.heap.digits = ALLOC_N(BDIGIT, len);
+        RBIGNUM(big)->as.heap.len = len;
+    }
+#else
+    big->len = len;
+    big->digits = ALLOC_N(BDIGIT, len);
+#endif
+    return (VALUE)big;
+}
+#define bignew(len, sign) bignew_1(rb_cBignum, len, sign)
+static const char *
+rb_u_string_to_inum_sign(const char *s, int *sign)
+{
+        *sign = 1;
+        if (*s == '-')
+                *sign = 0;
+        if (*s == '+' || *s == '-')
+                return s + 1;
+        return s;
+}
+static const char *
+rb_u_string_to_inum_base(const char *s, int *base)
+{
+        if (s[0] == '0') {
+                int offset = 2;
+                switch (s[1]) {
+                case 'x': case 'X':
+                        *base = 16;
+                        break;
+                case 'b': case 'B':
+                        *base = 2;
+                        break;
+                case 'o': case 'O':
+                        *base = 8;
+                        break;
+                case 'd': case 'D':
+                        *base = 10;
+                        break;
+                default:
+                        *base = 8;
+                        offset = 1;
+                        break;
+                }
+                return s + offset;
+        } else if (*base < -1) {
+                *base = -*base;
+        } else {
+                *base = 10;
+        }
+        return s;
+}
+static size_t
+rb_u_string_to_inum_base_bit_length(const char *s, int base)
+{
+        if (base < 2 || base > 36)
+                rb_u_raise(rb_eArgError, "illegal radix %d", base);
+        size_t bit_length;
+        switch (base) {
+        case 2:
+                bit_length = 1;
+        case 3:
+                bit_length = 2;
+        case 4: case 5: case 6: case 7: case 8:
+                bit_length = 3;
+        case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:
+                bit_length = 4;
+        default:
+                if (base <= 32)
+                        bit_length = 5;
+                bit_length = 6;
+        }
+        return bit_length * u_n_chars(s);
+}
+static bool
+rb_u_string_to_inum_num_separator(const char *str, const char *s, bool verify,
+                                  uint32_t c, bool *previous_was_separator)
+{
+        if (c != '_') {
+                *previous_was_separator = false;
+                return false;
+        }
+        if (*previous_was_separator) {
+                if (!verify)
+                        return false;
+                char buf[U_CHAR_MAX_BYTE_LENGTH];
+                int length = u_char_to_u(c, buf);
+                rb_u_raise(rb_eArgError,
+                           "unexpected ‘%.*s’ found at position %ld",
+                           length, buf, u_pointer_to_offset(str, s));
+        }
+        *previous_was_separator = true;
+        return true;
+}
+#define FULLWIDTH_A ((uint32_t)0xff21)
+#define FULLWIDTH_Z ((uint32_t)0xff3a)
+#define FULLWIDTH_a ((uint32_t)0xff41)
+#define FULLWIDTH_z ((uint32_t)0xff5a)
+static int
+u_char_zdigit_value(uint32_t c)
+{
+	if (c >= 'a' && c <= 'z')
+		return c - 'a' + 10;
+	else if (c >= 'A' && c <= 'Z')
+		return c - 'A' + 10;
+        else if (c >= FULLWIDTH_a && c <= FULLWIDTH_z)
+                return c - FULLWIDTH_a + 10;
+        else if (c >= FULLWIDTH_A && c <= FULLWIDTH_Z)
+                return c - FULLWIDTH_A + 10;
+	else
+		return u_char_digit_value(c);
+}
+static bool
+rb_u_string_to_inum_digit_value(const char *str, const char *s, uint32_t c,
+                                int base, bool verify, int *digit_value)
+{
+        /* If we stumble upon a space, return false so that we may end our
+         * processing and skip over any trailing white-space. */
+        if (u_char_isspace(c))
+                return false;
+        int value = u_char_zdigit_value(c);
+        if (value == -1) {
+                if (!verify)
+                        return false;
+                char buf[U_CHAR_MAX_BYTE_LENGTH];
+                int length = u_char_to_u(c, buf);
+                rb_u_raise(rb_eArgError,
+                           "non-digit character ‘%.*s’ found at position %ld",
+                           length, buf, u_pointer_to_offset(str, s));
+        }
+        if (value >= base) {
+                if (!verify)
+                        return false;
+                rb_u_raise(rb_eArgError,
+                           "value (%d) greater than base (%d) at position %ld",
+                           value, base, u_pointer_to_offset(str, s));
+        }
+        *digit_value = value;
+        return true;
+}
+static VALUE
+rb_u_string_to_inum_as_fix(const char *str, const char *s, int sign, int base,
+                           bool verify)
+{
+        unsigned long value = 0;
+        bool previous_was_separator = false;
+        while (*s != '\0') {
+                uint32_t c = u_decode(&s, s, s + 4);
+                if (rb_u_string_to_inum_num_separator(str, s, verify, c, &previous_was_separator))
+                        continue;
+                int digit_value;
+                if (!rb_u_string_to_inum_digit_value(str, s, c, base, verify, &digit_value))
+                        break;
+                value *= base;
+                value += digit_value;
+        }
+        if (verify) {
+                const char *t;
+                while (*s != '\0' && u_char_isspace(u_decode(&t, s, s + 4)))
+                        s = t;
+                if (*s != '\0')
+                        rb_u_raise(rb_eArgError,
+                                   "trailing garbage found at position %ld",
+                                   u_pointer_to_offset(str, s));
+        }
+        if (POSFIXABLE(value))
+                return sign ? LONG2FIX(value) : LONG2FIX(-(long)value);
+        VALUE big = rb_uint2big(value);
+        RBIGNUM_SET_SIGN(big, sign);
+        return rb_big_norm(big);
+}
+static VALUE
+rb_cutf_to_inum(const char * const str, int base, bool verify)
+{
+        /* FIXME: How can this even happen? */
+        if (str == NULL) {
+                if (verify)
+                        rb_invalid_str(str, "Integer");
+                return INT2FIX(0);
+        }
+        const char *s = str;
+        const char *t;
+        /* Skip any leading whitespace. */
+        while (u_char_isspace(u_decode(&t, s, s + 4)))
+                s = t;
+        /* Figure out what sign this number uses. */
+        int sign;
+        s = rb_u_string_to_inum_sign(s, &sign);
+        /* Do we have another sign?  If so, that’s not correct. */
+        if (*s == '+' || *s == '-') {
+                if (verify)
+                        rb_u_raise(rb_eArgError,
+                                   "extra sign ‘%c’ found at position %ld",
+                                   *s, u_pointer_to_offset(str, s));
+                return INT2FIX(0);
+        }
+        int tmp_base = base;
+        s = rb_u_string_to_inum_base(s, &tmp_base);
+        if (base <= 0)
+                base = tmp_base;
+        /* Remove preceeding 0s. */
+        while (*s == '0')
+                s++;
+        /* Figure out how many bits we need to represent the number. */
+        size_t bit_length = rb_u_string_to_inum_base_bit_length(str, base);
+        /* If the bit_length is less than the number of bits in a VALUE we can
+         * try to store it as a FIXNUM. */
+        if (bit_length <= sizeof(VALUE) * CHAR_BIT)
+                return rb_u_string_to_inum_as_fix(str, s, sign, base, verify);
+        if (verify && *str == '_')
+                rb_u_raise(rb_eArgError,
+                           "leading digit-separator ‘_’ found at position %ld",
+                           u_pointer_to_offset(str, s));
+        bit_length = bit_length / BITSPERDIG + 1;
+        /* TODO: Rename these variables. */
+        VALUE z = bignew(bit_length, sign);
+        BDIGIT *zds = BDIGITS(z);
+        MEMZERO(zds, BDIGIT, bit_length);
+        int big_len = 1;
+        bool previous_was_separator = false;
+        while (true) {
+                uint32_t c = u_decode(&s, s, s + 4);
+                if (rb_u_string_to_inum_num_separator(str, s, verify, c, &previous_was_separator))
+                        continue;
+                int digit_value;
+                if (!rb_u_string_to_inum_digit_value(str, s, c, base, verify, &digit_value))
+                        break;
+                int i = 0;
+                BDIGIT_DBL num = digit_value;
+                while (true) {
+                        for ( ; i < big_len; i++) {
+                                num += (BDIGIT_DBL)zds[i] * base;
+                                zds[i] = BIGLO(num);
+                                num = BIGDN(num);
+                        }
+                        if (num == 0)
+                                break;
+                        big_len++;
+                }
+        }
+        if (!verify)
+                return rb_big_norm(z);
+        s--;
+        if (str + 1 < s && s[-1] == '_')
+                rb_u_raise(rb_eArgError,
+                           "trailing digit-separator ‘_’ found at position %ld",
+                           u_pointer_to_offset(str, s));
+        if (*s != '\0')
+                rb_u_raise(rb_eArgError,
+                           "trailing garbage found at position %ld",
+                           u_pointer_to_offset(str, s));
+        return rb_big_norm(z);
+}
+VALUE
+rb_u_string_to_inum(VALUE self, int base, bool verify)
+{
+        const struct rb_u_string *string = RVAL2USTRING(self);
+        const char *s = USTRING_STR(string);
+        if (verify && (s == NULL || memchr(s, '\0', USTRING_LENGTH(string))))
+                rb_u_raise(rb_eArgError, "string contains null byte");
+        bool allocated = false;
+        if (s != NULL) {
+                long len = USTRING_LENGTH(string);
+                /* no sentinel somehow */
+                if (s[len] != '\0') {
+                        char *p = ALLOC_N(char, len + 1);
+                        MEMCPY(p, s, char, len);
+                        p[len] = '\0';
+                        s = p;
+                        allocated = true;
+                }
+        }
+        VALUE result = rb_cutf_to_inum(s, base, verify);
+        if (allocated)
+                free((char *)s);
+        return result;
+}