character_set 1.1.1 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.github/workflows/lint.yml +29 -0
- data/.github/workflows/tests.yml +22 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +11 -0
- data/BENCHMARK.md +53 -17
- data/CHANGELOG.md +47 -0
- data/README.md +38 -14
- data/Rakefile +60 -36
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +5 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +17 -6
- data/ext/character_set/character_set.c +963 -414
- data/ext/character_set/unicode_casefold_table.h +10 -2
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/character.rb +1 -1
- data/lib/character_set/core_ext/regexp_ext.rb +1 -1
- data/lib/character_set/core_ext/string_ext.rb +3 -1
- data/lib/character_set/expression_converter.rb +25 -27
- data/lib/character_set/parser.rb +1 -1
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +666 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +151 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/ruby_fallback.rb +5 -3
- data/lib/character_set/ruby_fallback/character_set_methods.rb +53 -6
- data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
- data/lib/character_set/shared_methods.rb +60 -49
- data/lib/character_set/version.rb +1 -1
- data/lib/character_set/writer.rb +98 -27
- metadata +88 -22
- data/.travis.yml +0 -11
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
| @@ -0,0 +1,13 @@ | |
| 1 | 
            +
            require_relative './shared'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            str = 'Lorem ipsum et dolorem'
         | 
| 4 | 
            +
            tr = '^A-Za-z'
         | 
| 5 | 
            +
            cs = CharacterSet.non_ascii_letter
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            benchmark(
         | 
| 8 | 
            +
              caption: 'Counting non-letters',
         | 
| 9 | 
            +
              cases: {
         | 
| 10 | 
            +
                'String#count'          => -> { str.count(tr) },
         | 
| 11 | 
            +
                'CharacterSet#count_in' => -> { cs.count_in(str) },
         | 
| 12 | 
            +
              }
         | 
| 13 | 
            +
            )
         | 
    
        data/benchmarks/delete_in.rb
    CHANGED
    
    | @@ -14,7 +14,7 @@ benchmark( | |
| 14 14 |  | 
| 15 15 | 
             
            str = 'Lörem ipsüm ⛷ et dölörem'
         | 
| 16 16 | 
             
            rx = /[\s\p{emoji}äüö]/
         | 
| 17 | 
            -
            cs = CharacterSet.whitespace + CharacterSet.emoji +  | 
| 17 | 
            +
            cs = CharacterSet.whitespace + CharacterSet.emoji + CharacterSet['ä', 'ö', 'ü']
         | 
| 18 18 |  | 
| 19 19 | 
             
            benchmark(
         | 
| 20 20 | 
             
              caption: 'Removing whitespace, emoji and umlauts',
         | 
    
        data/benchmarks/scan.rb
    ADDED
    
    | @@ -0,0 +1,13 @@ | |
| 1 | 
            +
            require_relative './shared'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            str = 'Lorem ipsum ⛷ et dolorem'
         | 
| 4 | 
            +
            rx = /\p{emoji}/
         | 
| 5 | 
            +
            cs = CharacterSet.emoji
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            benchmark(
         | 
| 8 | 
            +
              caption: 'Extracting emoji to an Array',
         | 
| 9 | 
            +
              cases: {
         | 
| 10 | 
            +
                'String#scan'       => -> { str.scan(rx) },
         | 
| 11 | 
            +
                'CharacterSet#scan' => -> { cs.scan(str) },
         | 
| 12 | 
            +
              }
         | 
| 13 | 
            +
            )
         | 
    
        data/benchmarks/shared.rb
    CHANGED
    
    | @@ -3,6 +3,11 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) | |
| 3 3 |  | 
| 4 4 | 
             
            require 'benchmark/ips'
         | 
| 5 5 | 
             
            require 'character_set'
         | 
| 6 | 
            +
            if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
         | 
| 7 | 
            +
              require 'sorted_set'
         | 
| 8 | 
            +
            else
         | 
| 9 | 
            +
              require 'set'
         | 
| 10 | 
            +
            end
         | 
| 6 11 |  | 
| 7 12 | 
             
            def benchmark(caption: nil, cases: {})
         | 
| 8 13 | 
             
              puts caption
         | 
    
        data/benchmarks/z_add.rb
    ADDED
    
    
| @@ -0,0 +1,12 @@ | |
| 1 | 
            +
            require_relative './shared'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            cs = CharacterSet.new(0..0x10FFFF)
         | 
| 4 | 
            +
            ss = SortedSet.new(0..0x10FFFF)
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            benchmark(
         | 
| 7 | 
            +
              caption: 'Removing entries',
         | 
| 8 | 
            +
              cases: {
         | 
| 9 | 
            +
                'CharacterSet#delete' => -> { cs.delete(rand(0x10FFFF)) },
         | 
| 10 | 
            +
                'SortedSet#delete'    => -> { ss.delete(rand(0x10FFFF)) },
         | 
| 11 | 
            +
              }
         | 
| 12 | 
            +
            )
         | 
| @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            require_relative './shared'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            cs1 = CharacterSet.new(0...0x88000)
         | 
| 4 | 
            +
            cs2 = CharacterSet.new(0x88000..0x10FFFF)
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            ss1 = SortedSet.new(0...0x88000)
         | 
| 7 | 
            +
            ss2 = SortedSet.new(0x88000..0x10FFFF)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            benchmark(
         | 
| 10 | 
            +
              caption: 'Merging entries',
         | 
| 11 | 
            +
              cases: {
         | 
| 12 | 
            +
                'CharacterSet#merge' => -> { cs1.merge(cs2) },
         | 
| 13 | 
            +
                'SortedSet#merge'    => -> { ss1.merge(ss2) },
         | 
| 14 | 
            +
              }
         | 
| 15 | 
            +
            )
         | 
| @@ -0,0 +1,12 @@ | |
| 1 | 
            +
            require_relative './shared'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            cs = CharacterSet.new(0..0xFFFF)
         | 
| 4 | 
            +
            ss = SortedSet.new(0..0xFFFF)
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            benchmark(
         | 
| 7 | 
            +
              caption: 'Getting the min and max',
         | 
| 8 | 
            +
              cases: {
         | 
| 9 | 
            +
                'CharacterSet#minmax' => -> { cs.minmax },
         | 
| 10 | 
            +
                'SortedSet#minmax'    => -> { ss.minmax },
         | 
| 11 | 
            +
              }
         | 
| 12 | 
            +
            )
         | 
    
        data/bin/console
    CHANGED
    
    
    
        data/character_set.gemspec
    CHANGED
    
    | @@ -10,7 +10,7 @@ Gem::Specification.new do |s| | |
| 10 10 | 
             
              s.email         = ['janosch84@gmail.com']
         | 
| 11 11 |  | 
| 12 12 | 
             
              s.summary       = 'Build, read, write and compare sets of Unicode codepoints.'
         | 
| 13 | 
            -
              s.homepage      = 'https://github.com/ | 
| 13 | 
            +
              s.homepage      = 'https://github.com/jaynetics/character_set'
         | 
| 14 14 | 
             
              s.license       = 'MIT'
         | 
| 15 15 |  | 
| 16 16 | 
             
              s.files         = `git ls-files -z`.split("\x0").reject do |f|
         | 
| @@ -22,12 +22,23 @@ Gem::Specification.new do |s| | |
| 22 22 |  | 
| 23 23 | 
             
              s.required_ruby_version = '>= 2.1.0'
         | 
| 24 24 |  | 
| 25 | 
            +
              # SortedSet, needed for RubyFallback, was moved to a gem in Ruby 3.
         | 
| 26 | 
            +
              # This dependency is only used if the C extension is unavailable.
         | 
| 27 | 
            +
              # JRuby has it in the stdlib.
         | 
| 28 | 
            +
              if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
         | 
| 29 | 
            +
                s.add_dependency 'sorted_set', '~> 1.0'
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
             | 
| 25 32 | 
             
              s.add_development_dependency 'benchmark-ips', '~> 2.7'
         | 
| 26 | 
            -
              s.add_development_dependency ' | 
| 27 | 
            -
              s.add_development_dependency 'rake', '~>  | 
| 28 | 
            -
              s.add_development_dependency 'rake-compiler', '~> 1. | 
| 33 | 
            +
              s.add_development_dependency 'get_process_mem', '~> 0.2.3'
         | 
| 34 | 
            +
              s.add_development_dependency 'rake', '~> 13.0'
         | 
| 35 | 
            +
              s.add_development_dependency 'rake-compiler', '~> 1.1'
         | 
| 29 36 | 
             
              s.add_development_dependency 'range_compressor', '~> 1.0'
         | 
| 30 | 
            -
              s.add_development_dependency 'regexp_parser', '~> 1. | 
| 31 | 
            -
              s.add_development_dependency 'regexp_property_values', '~> 0 | 
| 37 | 
            +
              s.add_development_dependency 'regexp_parser', '~> 1.6'
         | 
| 38 | 
            +
              s.add_development_dependency 'regexp_property_values', '~> 1.0'
         | 
| 32 39 | 
             
              s.add_development_dependency 'rspec', '~> 3.8'
         | 
| 40 | 
            +
              if RUBY_VERSION.to_f >= 2.7
         | 
| 41 | 
            +
                s.add_development_dependency 'codecov', '~> 0.2.12'
         | 
| 42 | 
            +
                s.add_development_dependency 'rubocop', '~> 1.8'
         | 
| 43 | 
            +
              end
         | 
| 33 44 | 
             
            end
         | 
| @@ -2,81 +2,180 @@ | |
| 2 2 | 
             
            #include "ruby/encoding.h"
         | 
| 3 3 | 
             
            #include "unicode_casefold_table.h"
         | 
| 4 4 |  | 
| 5 | 
            -
            #define  | 
| 6 | 
            -
            #define  | 
| 7 | 
            -
            #define  | 
| 5 | 
            +
            #define UNICODE_PLANE_SIZE 0x10000
         | 
| 6 | 
            +
            #define UNICODE_PLANE_COUNT 17
         | 
| 7 | 
            +
            #define UNICODE_CP_COUNT (UNICODE_PLANE_SIZE * UNICODE_PLANE_COUNT)
         | 
| 8 8 |  | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 9 | 
            +
            // start at ascii size
         | 
| 10 | 
            +
            #define CS_DEFAULT_INITIAL_LEN 128
         | 
| 11 11 |  | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 12 | 
            +
            typedef char cs_ar;
         | 
| 13 | 
            +
            typedef unsigned long cs_cp;
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            struct cs_data
         | 
| 16 | 
            +
            {
         | 
| 17 | 
            +
              cs_ar *cps;
         | 
| 18 | 
            +
              cs_cp len;
         | 
| 19 | 
            +
            };
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            #define CS_MSIZE(len) (sizeof(cs_ar) * (len / 8))
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            static inline void
         | 
| 24 | 
            +
            add_memspace_for_another_plane(struct cs_data *data)
         | 
| 25 | 
            +
            {
         | 
| 26 | 
            +
              data->cps = ruby_xrealloc(data->cps, CS_MSIZE(data->len + UNICODE_PLANE_SIZE));
         | 
| 27 | 
            +
              memset(data->cps + CS_MSIZE(data->len), 0, CS_MSIZE(UNICODE_PLANE_SIZE));
         | 
| 28 | 
            +
              data->len += UNICODE_PLANE_SIZE;
         | 
| 29 | 
            +
            }
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            static inline void
         | 
| 32 | 
            +
            ensure_memsize_fits(struct cs_data *data, cs_cp target_cp)
         | 
| 33 | 
            +
            {
         | 
| 34 | 
            +
              while (target_cp >= data->len)
         | 
| 35 | 
            +
              {
         | 
| 36 | 
            +
                add_memspace_for_another_plane(data);
         | 
| 37 | 
            +
              }
         | 
| 38 | 
            +
            }
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            static inline void
         | 
| 41 | 
            +
            set_cp(struct cs_data *data, cs_cp cp)
         | 
| 42 | 
            +
            {
         | 
| 43 | 
            +
              ensure_memsize_fits(data, cp);
         | 
| 44 | 
            +
              data->cps[cp >> 3] |= (1 << (cp & 0x07));
         | 
| 45 | 
            +
            }
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            static inline int
         | 
| 48 | 
            +
            tst_cp(cs_ar *cps, cs_cp len, cs_cp cp)
         | 
| 49 | 
            +
            {
         | 
| 50 | 
            +
              return ((cp < len) && cps[cp >> 3] & (1 << (cp & 0x07)));
         | 
| 51 | 
            +
            }
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            static inline void
         | 
| 54 | 
            +
            clr_cp(cs_ar *cps, cs_cp len, cs_cp cp)
         | 
| 55 | 
            +
            {
         | 
| 56 | 
            +
              if (cp < len)
         | 
| 57 | 
            +
              {
         | 
| 58 | 
            +
                cps[cp >> 3] &= ~(1 << (cp & 0x07));
         | 
| 59 | 
            +
              }
         | 
| 60 | 
            +
            }
         | 
| 16 61 |  | 
| 17 62 | 
             
            static void
         | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 63 | 
            +
            cs_free(void *ptr)
         | 
| 64 | 
            +
            {
         | 
| 65 | 
            +
              struct cs_data *data = ptr;
         | 
| 66 | 
            +
              ruby_xfree(data->cps);
         | 
| 67 | 
            +
              ruby_xfree(data);
         | 
| 20 68 | 
             
            }
         | 
| 21 69 |  | 
| 22 70 | 
             
            static size_t
         | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                . | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 71 | 
            +
            cs_memsize(const void *ptr)
         | 
| 72 | 
            +
            {
         | 
| 73 | 
            +
              const struct cs_data *data = ptr;
         | 
| 74 | 
            +
              return sizeof(*data) + CS_MSIZE(data->len);
         | 
| 75 | 
            +
            }
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            static const rb_data_type_t cs_type = {
         | 
| 78 | 
            +
                .wrap_struct_name = "character_set",
         | 
| 79 | 
            +
                .function = {
         | 
| 80 | 
            +
                    .dmark = NULL,
         | 
| 81 | 
            +
                    .dfree = cs_free,
         | 
| 82 | 
            +
                    .dsize = cs_memsize,
         | 
| 83 | 
            +
                },
         | 
| 84 | 
            +
                .data = NULL,
         | 
| 85 | 
            +
                .flags = RUBY_TYPED_FREE_IMMEDIATELY,
         | 
| 37 86 | 
             
            };
         | 
| 38 87 |  | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 88 | 
            +
            static inline VALUE
         | 
| 89 | 
            +
            cs_alloc_len(VALUE klass, struct cs_data **data_ptr, cs_cp len)
         | 
| 90 | 
            +
            {
         | 
| 91 | 
            +
              VALUE cs;
         | 
| 92 | 
            +
              struct cs_data *data;
         | 
| 93 | 
            +
              cs = TypedData_Make_Struct(klass, struct cs_data, &cs_type, data);
         | 
| 94 | 
            +
              data->cps = ruby_xmalloc(CS_MSIZE(len));
         | 
| 95 | 
            +
              memset(data->cps, 0, CS_MSIZE(len));
         | 
| 96 | 
            +
              data->len = len;
         | 
| 97 | 
            +
             | 
| 98 | 
            +
              if (data_ptr)
         | 
| 99 | 
            +
              {
         | 
| 100 | 
            +
                *data_ptr = data;
         | 
| 101 | 
            +
              }
         | 
| 41 102 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 103 | 
            +
              return cs;
         | 
| 104 | 
            +
            }
         | 
| 44 105 |  | 
| 45 | 
            -
            static VALUE
         | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
               | 
| 49 | 
            -
              return NEW_CHARACTER_SET(self, cp_arr);
         | 
| 106 | 
            +
            static inline VALUE
         | 
| 107 | 
            +
            cs_alloc(VALUE klass, struct cs_data **data_ptr)
         | 
| 108 | 
            +
            {
         | 
| 109 | 
            +
              return cs_alloc_len(klass, data_ptr, CS_DEFAULT_INITIAL_LEN);
         | 
| 50 110 | 
             
            }
         | 
| 51 111 |  | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
               | 
| 56 | 
            -
               | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 112 | 
            +
            static inline struct cs_data *
         | 
| 113 | 
            +
            cs_fetch_data(VALUE cs)
         | 
| 114 | 
            +
            {
         | 
| 115 | 
            +
              struct cs_data *data;
         | 
| 116 | 
            +
              TypedData_Get_Struct(cs, struct cs_data, &cs_type, data);
         | 
| 117 | 
            +
              return data;
         | 
| 118 | 
            +
            }
         | 
| 119 | 
            +
             | 
| 120 | 
            +
            static inline cs_ar *
         | 
| 121 | 
            +
            cs_fetch_cps(VALUE cs, cs_cp *len_ptr)
         | 
| 122 | 
            +
            {
         | 
| 123 | 
            +
              struct cs_data *data;
         | 
| 124 | 
            +
              data = cs_fetch_data(cs);
         | 
| 125 | 
            +
              *len_ptr = data->len;
         | 
| 126 | 
            +
              return data->cps;
         | 
| 127 | 
            +
            }
         | 
| 128 | 
            +
             | 
| 129 | 
            +
            static VALUE
         | 
| 130 | 
            +
            cs_method_allocate(VALUE self)
         | 
| 131 | 
            +
            {
         | 
| 132 | 
            +
              return cs_alloc(self, 0);
         | 
| 133 | 
            +
            }
         | 
| 134 | 
            +
             | 
| 135 | 
            +
            #define FOR_EACH_ACTIVE_CODEPOINT(action) \
         | 
| 136 | 
            +
              do                                      \
         | 
| 137 | 
            +
              {                                       \
         | 
| 138 | 
            +
                cs_cp cp, len;                        \
         | 
| 139 | 
            +
                cs_ar *cps;                           \
         | 
| 140 | 
            +
                cps = cs_fetch_cps(self, &len);       \
         | 
| 141 | 
            +
                for (cp = 0; cp < len; cp++)          \
         | 
| 142 | 
            +
                {                                     \
         | 
| 143 | 
            +
                  if (tst_cp(cps, len, cp))           \
         | 
| 144 | 
            +
                  {                                   \
         | 
| 145 | 
            +
                    action;                           \
         | 
| 146 | 
            +
                  }                                   \
         | 
| 147 | 
            +
                }                                     \
         | 
| 148 | 
            +
              } while (0)
         | 
| 59 149 |  | 
| 60 150 | 
             
            // ***************************
         | 
| 61 151 | 
             
            // `Set` compatibility methods
         | 
| 62 152 | 
             
            // ***************************
         | 
| 63 153 |  | 
| 64 | 
            -
            static inline  | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 154 | 
            +
            static inline cs_cp
         | 
| 155 | 
            +
            cs_active_cp_count(VALUE self)
         | 
| 156 | 
            +
            {
         | 
| 157 | 
            +
              cs_cp count;
         | 
| 67 158 | 
             
              count = 0;
         | 
| 68 159 | 
             
              FOR_EACH_ACTIVE_CODEPOINT(count++);
         | 
| 69 | 
            -
              return  | 
| 160 | 
            +
              return count;
         | 
| 70 161 | 
             
            }
         | 
| 71 162 |  | 
| 72 163 | 
             
            static VALUE
         | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 164 | 
            +
            cs_method_length(VALUE self)
         | 
| 165 | 
            +
            {
         | 
| 166 | 
            +
              return LONG2FIX(cs_active_cp_count(self));
         | 
| 167 | 
            +
            }
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            static inline VALUE
         | 
| 170 | 
            +
            cs_enumerator_length(VALUE self, VALUE args, VALUE eobj)
         | 
| 171 | 
            +
            {
         | 
| 172 | 
            +
              return LONG2FIX(cs_active_cp_count(self));
         | 
| 75 173 | 
             
            }
         | 
| 76 174 |  | 
| 77 175 | 
             
            static VALUE
         | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 176 | 
            +
            cs_method_each(VALUE self)
         | 
| 177 | 
            +
            {
         | 
| 178 | 
            +
              RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
         | 
| 80 179 | 
             
              FOR_EACH_ACTIVE_CODEPOINT(rb_yield(LONG2FIX(cp)));
         | 
| 81 180 | 
             
              return self;
         | 
| 82 181 | 
             
            }
         | 
| @@ -84,16 +183,19 @@ method_each(VALUE self) { | |
| 84 183 | 
             
            // returns an Array of codepoint Integers by default.
         | 
| 85 184 | 
             
            // returns an Array of Strings of length 1 if passed `true`.
         | 
| 86 185 | 
             
            static VALUE
         | 
| 87 | 
            -
             | 
| 186 | 
            +
            cs_method_to_a(int argc, VALUE *argv, VALUE self)
         | 
| 187 | 
            +
            {
         | 
| 88 188 | 
             
              VALUE arr;
         | 
| 89 189 | 
             
              rb_encoding *enc;
         | 
| 90 190 | 
             
              rb_check_arity(argc, 0, 1);
         | 
| 91 191 |  | 
| 92 192 | 
             
              arr = rb_ary_new();
         | 
| 93 | 
            -
              if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse) | 
| 193 | 
            +
              if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
         | 
| 194 | 
            +
              {
         | 
| 94 195 | 
             
                FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, LONG2FIX(cp)));
         | 
| 95 196 | 
             
              }
         | 
| 96 | 
            -
              else | 
| 197 | 
            +
              else
         | 
| 198 | 
            +
              {
         | 
| 97 199 | 
             
                enc = rb_utf8_encoding();
         | 
| 98 200 | 
             
                FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, rb_enc_uint_chr((int)cp, enc)));
         | 
| 99 201 | 
             
              }
         | 
| @@ -102,302 +204,473 @@ method_to_a(int argc, VALUE *argv, VALUE self) { | |
| 102 204 | 
             
            }
         | 
| 103 205 |  | 
| 104 206 | 
             
            static VALUE
         | 
| 105 | 
            -
             | 
| 207 | 
            +
            cs_method_empty_p(VALUE self)
         | 
| 208 | 
            +
            {
         | 
| 106 209 | 
             
              FOR_EACH_ACTIVE_CODEPOINT(return Qfalse);
         | 
| 107 210 | 
             
              return Qtrue;
         | 
| 108 211 | 
             
            }
         | 
| 109 212 |  | 
| 110 213 | 
             
            static VALUE
         | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
               | 
| 114 | 
            -
               | 
| 214 | 
            +
            cs_method_hash(VALUE self)
         | 
| 215 | 
            +
            {
         | 
| 216 | 
            +
              cs_cp cp, len, hash, four_byte_value;
         | 
| 217 | 
            +
              cs_ar *cps;
         | 
| 218 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 219 | 
            +
              four_byte_value = 0;
         | 
| 115 220 |  | 
| 116 221 | 
             
              hash = 17;
         | 
| 117 | 
            -
              for (cp = 0; cp <  | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 222 | 
            +
              for (cp = 0; cp < len; cp++)
         | 
| 223 | 
            +
              {
         | 
| 224 | 
            +
                if (cp % 32 == 0)
         | 
| 225 | 
            +
                {
         | 
| 226 | 
            +
                  if (cp != 0)
         | 
| 227 | 
            +
                  {
         | 
| 228 | 
            +
                    hash = hash * 23 + four_byte_value;
         | 
| 229 | 
            +
                  }
         | 
| 120 230 | 
             
                  four_byte_value = 0;
         | 
| 121 231 | 
             
                }
         | 
| 122 | 
            -
                if ( | 
| 232 | 
            +
                if (tst_cp(cps, len, cp))
         | 
| 233 | 
            +
                {
         | 
| 234 | 
            +
                  four_byte_value++;
         | 
| 235 | 
            +
                }
         | 
| 123 236 | 
             
              }
         | 
| 124 237 |  | 
| 125 238 | 
             
              return LONG2FIX(hash);
         | 
| 126 239 | 
             
            }
         | 
| 127 240 |  | 
| 128 241 | 
             
            static inline VALUE
         | 
| 129 | 
            -
             | 
| 242 | 
            +
            cs_delete_if_block_result(VALUE self, int truthy)
         | 
| 243 | 
            +
            {
         | 
| 130 244 | 
             
              VALUE result;
         | 
| 131 245 | 
             
              rb_need_block();
         | 
| 132 246 | 
             
              rb_check_frozen(self);
         | 
| 133 247 | 
             
              FOR_EACH_ACTIVE_CODEPOINT(
         | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
              );
         | 
| 248 | 
            +
                  result = rb_yield(LONG2FIX(cp));
         | 
| 249 | 
            +
                  if ((NIL_P(result) || result == Qfalse) != truthy) clr_cp(cps, len, cp););
         | 
| 137 250 | 
             
              return self;
         | 
| 138 251 | 
             
            }
         | 
| 139 252 |  | 
| 140 253 | 
             
            static VALUE
         | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
               | 
| 254 | 
            +
            cs_method_delete_if(VALUE self)
         | 
| 255 | 
            +
            {
         | 
| 256 | 
            +
              RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
         | 
| 257 | 
            +
              return cs_delete_if_block_result(self, 1);
         | 
| 144 258 | 
             
            }
         | 
| 145 259 |  | 
| 146 260 | 
             
            static VALUE
         | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
| 149 | 
            -
               | 
| 261 | 
            +
            cs_method_keep_if(VALUE self)
         | 
| 262 | 
            +
            {
         | 
| 263 | 
            +
              RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
         | 
| 264 | 
            +
              return cs_delete_if_block_result(self, 0);
         | 
| 150 265 | 
             
            }
         | 
| 151 266 |  | 
| 152 267 | 
             
            static VALUE
         | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
               | 
| 268 | 
            +
            cs_method_clear(VALUE self)
         | 
| 269 | 
            +
            {
         | 
| 270 | 
            +
              struct cs_data *data;
         | 
| 156 271 | 
             
              rb_check_frozen(self);
         | 
| 157 | 
            -
               | 
| 158 | 
            -
               | 
| 159 | 
            -
                CLRBIT(cps, cp);
         | 
| 160 | 
            -
              }
         | 
| 272 | 
            +
              data = cs_fetch_data(self);
         | 
| 273 | 
            +
              memset(data->cps, 0, CS_MSIZE(data->len));
         | 
| 161 274 | 
             
              return self;
         | 
| 162 275 | 
             
            }
         | 
| 163 276 |  | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
             | 
| 167 | 
            -
               | 
| 168 | 
            -
               | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 277 | 
            +
            static VALUE
         | 
| 278 | 
            +
            cs_method_min(VALUE self)
         | 
| 279 | 
            +
            {
         | 
| 280 | 
            +
              FOR_EACH_ACTIVE_CODEPOINT(return LONG2FIX(cp));
         | 
| 281 | 
            +
              return Qnil;
         | 
| 282 | 
            +
            }
         | 
| 283 | 
            +
             | 
| 284 | 
            +
            static VALUE
         | 
| 285 | 
            +
            cs_method_max(VALUE self)
         | 
| 286 | 
            +
            {
         | 
| 287 | 
            +
              cs_cp len;
         | 
| 288 | 
            +
              long reverse_idx;
         | 
| 289 | 
            +
              cs_ar *cps;
         | 
| 290 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 291 | 
            +
              for (reverse_idx = len; reverse_idx >= 0; reverse_idx--)
         | 
| 292 | 
            +
              {
         | 
| 293 | 
            +
                if (tst_cp(cps, len, reverse_idx))
         | 
| 294 | 
            +
                {
         | 
| 295 | 
            +
                  return LONG2FIX(reverse_idx);
         | 
| 296 | 
            +
                }
         | 
| 297 | 
            +
              }
         | 
| 298 | 
            +
              return Qnil;
         | 
| 299 | 
            +
            }
         | 
| 300 | 
            +
             | 
| 301 | 
            +
            static VALUE
         | 
| 302 | 
            +
            cs_method_minmax(VALUE self)
         | 
| 303 | 
            +
            {
         | 
| 304 | 
            +
              VALUE arr;
         | 
| 305 | 
            +
              arr = rb_ary_new2(2);
         | 
| 306 | 
            +
              rb_ary_push(arr, cs_method_min(self));
         | 
| 307 | 
            +
              rb_ary_push(arr, cs_method_max(self));
         | 
| 308 | 
            +
              return arr;
         | 
| 309 | 
            +
            }
         | 
| 310 | 
            +
             | 
| 311 | 
            +
            #define RETURN_COMBINED_CS(cs_a, cs_b, comp_op)                  \
         | 
| 312 | 
            +
              do                                                             \
         | 
| 313 | 
            +
              {                                                              \
         | 
| 314 | 
            +
                VALUE new_cs;                                                \
         | 
| 315 | 
            +
                cs_cp cp, alen, blen;                                        \
         | 
| 316 | 
            +
                cs_ar *acps, *bcps;                                          \
         | 
| 317 | 
            +
                struct cs_data *new_data;                                    \
         | 
| 318 | 
            +
                new_cs = cs_alloc(RBASIC(self)->klass, &new_data);           \
         | 
| 319 | 
            +
                acps = cs_fetch_cps(cs_a, &alen);                            \
         | 
| 320 | 
            +
                bcps = cs_fetch_cps(cs_b, &blen);                            \
         | 
| 321 | 
            +
                for (cp = 0; cp < UNICODE_CP_COUNT; cp++)                    \
         | 
| 322 | 
            +
                {                                                            \
         | 
| 323 | 
            +
                  if (tst_cp(acps, alen, cp) comp_op tst_cp(bcps, blen, cp)) \
         | 
| 324 | 
            +
                  {                                                          \
         | 
| 325 | 
            +
                    set_cp(new_data, cp);                                    \
         | 
| 326 | 
            +
                  }                                                          \
         | 
| 327 | 
            +
                }                                                            \
         | 
| 328 | 
            +
                return new_cs;                                               \
         | 
| 329 | 
            +
              } while (0)
         | 
| 174 330 |  | 
| 175 331 | 
             
            static VALUE
         | 
| 176 | 
            -
             | 
| 177 | 
            -
             | 
| 332 | 
            +
            cs_method_intersection(VALUE self, VALUE other)
         | 
| 333 | 
            +
            {
         | 
| 334 | 
            +
              RETURN_COMBINED_CS(self, other, &&);
         | 
| 178 335 | 
             
            }
         | 
| 179 336 |  | 
| 180 337 | 
             
            static VALUE
         | 
| 181 | 
            -
             | 
| 182 | 
            -
             | 
| 338 | 
            +
            cs_method_exclusion(VALUE self, VALUE other)
         | 
| 339 | 
            +
            {
         | 
| 340 | 
            +
              RETURN_COMBINED_CS(self, other, ^);
         | 
| 183 341 | 
             
            }
         | 
| 184 342 |  | 
| 185 343 | 
             
            static VALUE
         | 
| 186 | 
            -
             | 
| 187 | 
            -
             | 
| 344 | 
            +
            cs_method_union(VALUE self, VALUE other)
         | 
| 345 | 
            +
            {
         | 
| 346 | 
            +
              RETURN_COMBINED_CS(self, other, ||);
         | 
| 188 347 | 
             
            }
         | 
| 189 348 |  | 
| 190 349 | 
             
            static VALUE
         | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 350 | 
            +
            cs_method_difference(VALUE self, VALUE other)
         | 
| 351 | 
            +
            {
         | 
| 352 | 
            +
              RETURN_COMBINED_CS(self, other, >);
         | 
| 193 353 | 
             
            }
         | 
| 194 354 |  | 
| 195 355 | 
             
            static VALUE
         | 
| 196 | 
            -
             | 
| 197 | 
            -
             | 
| 198 | 
            -
               | 
| 199 | 
            -
               | 
| 356 | 
            +
            cs_method_include_p(VALUE self, VALUE num)
         | 
| 357 | 
            +
            {
         | 
| 358 | 
            +
              cs_ar *cps;
         | 
| 359 | 
            +
              cs_cp len;
         | 
| 360 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 361 | 
            +
              return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
         | 
| 200 362 | 
             
            }
         | 
| 201 363 |  | 
| 202 | 
            -
            static inline  | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
               | 
| 206 | 
            -
               | 
| 207 | 
            -
               | 
| 364 | 
            +
            static inline VALUE
         | 
| 365 | 
            +
            cs_toggle_codepoint(VALUE cs, VALUE cp_num, int on, int return_nil_if_noop)
         | 
| 366 | 
            +
            {
         | 
| 367 | 
            +
              cs_cp cp, len;
         | 
| 368 | 
            +
              cs_ar *cps;
         | 
| 369 | 
            +
              struct cs_data *data;
         | 
| 370 | 
            +
              rb_check_frozen(cs);
         | 
| 371 | 
            +
              data = cs_fetch_data(cs);
         | 
| 372 | 
            +
              cps = data->cps;
         | 
| 373 | 
            +
              len = data->len;
         | 
| 208 374 | 
             
              cp = FIX2ULONG(cp_num);
         | 
| 209 | 
            -
              if ( | 
| 210 | 
            -
             | 
| 375 | 
            +
              if (return_nil_if_noop && (!tst_cp(cps, len, cp) == !on))
         | 
| 376 | 
            +
              {
         | 
| 377 | 
            +
                return Qnil;
         | 
| 211 378 | 
             
              }
         | 
| 212 | 
            -
              else | 
| 213 | 
            -
             | 
| 214 | 
            -
                 | 
| 215 | 
            -
                 | 
| 379 | 
            +
              else
         | 
| 380 | 
            +
              {
         | 
| 381 | 
            +
                if (on)
         | 
| 382 | 
            +
                {
         | 
| 383 | 
            +
                  set_cp(data, cp);
         | 
| 384 | 
            +
                }
         | 
| 385 | 
            +
                else
         | 
| 386 | 
            +
                {
         | 
| 387 | 
            +
                  clr_cp(cps, len, cp);
         | 
| 388 | 
            +
                }
         | 
| 389 | 
            +
                return cs;
         | 
| 216 390 | 
             
              }
         | 
| 217 391 | 
             
            }
         | 
| 218 392 |  | 
| 219 393 | 
             
            static VALUE
         | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 394 | 
            +
            cs_method_add(VALUE self, VALUE cp_num)
         | 
| 395 | 
            +
            {
         | 
| 396 | 
            +
              return cs_toggle_codepoint(self, cp_num, 1, 0);
         | 
| 222 397 | 
             
            }
         | 
| 223 398 |  | 
| 224 399 | 
             
            static VALUE
         | 
| 225 | 
            -
             | 
| 226 | 
            -
             | 
| 400 | 
            +
            cs_method_add_p(VALUE self, VALUE cp_num)
         | 
| 401 | 
            +
            {
         | 
| 402 | 
            +
              return cs_toggle_codepoint(self, cp_num, 1, 1);
         | 
| 227 403 | 
             
            }
         | 
| 228 404 |  | 
| 229 405 | 
             
            static VALUE
         | 
| 230 | 
            -
             | 
| 231 | 
            -
             | 
| 406 | 
            +
            cs_method_delete(VALUE self, VALUE cp_num)
         | 
| 407 | 
            +
            {
         | 
| 408 | 
            +
              return cs_toggle_codepoint(self, cp_num, 0, 0);
         | 
| 232 409 | 
             
            }
         | 
| 233 410 |  | 
| 234 411 | 
             
            static VALUE
         | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 412 | 
            +
            cs_method_delete_p(VALUE self, VALUE cp_num)
         | 
| 413 | 
            +
            {
         | 
| 414 | 
            +
              return cs_toggle_codepoint(self, cp_num, 0, 1);
         | 
| 237 415 | 
             
            }
         | 
| 238 416 |  | 
| 239 | 
            -
            #define COMPARE_SETS(action)\
         | 
| 240 | 
            -
              cp_index cp;\
         | 
| 241 | 
            -
              cp_byte *cps, *other_cps;\
         | 
| 242 | 
            -
              FETCH_CODEPOINTS(self, cps);\
         | 
| 243 | 
            -
              FETCH_CODEPOINTS(other, other_cps);\
         | 
| 244 | 
            -
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++) { action; }\
         | 
| 245 | 
            -
             | 
| 246 417 | 
             
            static VALUE
         | 
| 247 | 
            -
             | 
| 248 | 
            -
             | 
| 418 | 
            +
            cs_method_intersect_p(VALUE self, VALUE other)
         | 
| 419 | 
            +
            {
         | 
| 420 | 
            +
              cs_cp cp, alen, blen;
         | 
| 421 | 
            +
              cs_ar *acps, *bcps;
         | 
| 422 | 
            +
              acps = cs_fetch_cps(self, &alen);
         | 
| 423 | 
            +
              bcps = cs_fetch_cps(other, &blen);
         | 
| 424 | 
            +
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
         | 
| 425 | 
            +
              {
         | 
| 426 | 
            +
                if (tst_cp(acps, alen, cp) && tst_cp(bcps, blen, cp))
         | 
| 427 | 
            +
                {
         | 
| 428 | 
            +
                  return Qtrue;
         | 
| 429 | 
            +
                }
         | 
| 430 | 
            +
              }
         | 
| 249 431 | 
             
              return Qfalse;
         | 
| 250 432 | 
             
            }
         | 
| 251 433 |  | 
| 252 434 | 
             
            static VALUE
         | 
| 253 | 
            -
             | 
| 254 | 
            -
             | 
| 435 | 
            +
            cs_method_disjoint_p(VALUE self, VALUE other)
         | 
| 436 | 
            +
            {
         | 
| 437 | 
            +
              return cs_method_intersect_p(self, other) ? Qfalse : Qtrue;
         | 
| 255 438 | 
             
            }
         | 
| 256 439 |  | 
| 257 440 | 
             
            static inline int
         | 
| 258 | 
            -
             | 
| 259 | 
            -
             | 
| 441 | 
            +
            cs_check_type(VALUE obj)
         | 
| 442 | 
            +
            {
         | 
| 443 | 
            +
              return rb_typeddata_is_kind_of(obj, &cs_type);
         | 
| 260 444 | 
             
            }
         | 
| 261 445 |  | 
| 262 446 | 
             
            static VALUE
         | 
| 263 | 
            -
             | 
| 264 | 
            -
             | 
| 265 | 
            -
               | 
| 266 | 
            -
             | 
| 267 | 
            -
               | 
| 268 | 
            -
             | 
| 447 | 
            +
            cs_cps_eql(VALUE cs_a, VALUE cs_b)
         | 
| 448 | 
            +
            {
         | 
| 449 | 
            +
              cs_cp cp, alen, blen;
         | 
| 450 | 
            +
              cs_ar *acps, *bcps;
         | 
| 451 | 
            +
              acps = cs_fetch_cps(cs_a, &alen);
         | 
| 452 | 
            +
              bcps = cs_fetch_cps(cs_b, &blen);
         | 
| 453 | 
            +
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
         | 
| 454 | 
            +
              {
         | 
| 455 | 
            +
                if (tst_cp(acps, alen, cp) != tst_cp(bcps, blen, cp))
         | 
| 456 | 
            +
                {
         | 
| 457 | 
            +
                  return Qfalse;
         | 
| 458 | 
            +
                }
         | 
| 459 | 
            +
              }
         | 
| 269 460 | 
             
              return Qtrue;
         | 
| 270 461 | 
             
            }
         | 
| 271 462 |  | 
| 463 | 
            +
            static VALUE
         | 
| 464 | 
            +
            cs_method_eql_p(VALUE self, VALUE other)
         | 
| 465 | 
            +
            {
         | 
| 466 | 
            +
              if (!cs_check_type(other))
         | 
| 467 | 
            +
              {
         | 
| 468 | 
            +
                return Qfalse;
         | 
| 469 | 
            +
              }
         | 
| 470 | 
            +
              if (self == other) // same object_id
         | 
| 471 | 
            +
              {
         | 
| 472 | 
            +
                return Qtrue;
         | 
| 473 | 
            +
              }
         | 
| 474 | 
            +
              return cs_cps_eql(self, other);
         | 
| 475 | 
            +
            }
         | 
| 476 | 
            +
             | 
| 272 477 | 
             
            static inline VALUE
         | 
| 273 | 
            -
             | 
| 274 | 
            -
             | 
| 275 | 
            -
               | 
| 478 | 
            +
            cs_merge_cs(VALUE recipient, VALUE source)
         | 
| 479 | 
            +
            {
         | 
| 480 | 
            +
              cs_cp cp, source_len;
         | 
| 481 | 
            +
              struct cs_data *data;
         | 
| 482 | 
            +
              cs_ar *source_cps;
         | 
| 483 | 
            +
              data = cs_fetch_data(recipient);
         | 
| 484 | 
            +
              source_cps = cs_fetch_cps(source, &source_len);
         | 
| 485 | 
            +
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
         | 
| 486 | 
            +
              {
         | 
| 487 | 
            +
                if (tst_cp(source_cps, source_len, cp))
         | 
| 488 | 
            +
                {
         | 
| 489 | 
            +
                  set_cp(data, cp);
         | 
| 490 | 
            +
                }
         | 
| 491 | 
            +
              }
         | 
| 492 | 
            +
              return recipient;
         | 
| 276 493 | 
             
            }
         | 
| 277 494 |  | 
| 278 | 
            -
            static inline  | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 495 | 
            +
            static inline cs_cp
         | 
| 496 | 
            +
            cs_checked_cp(VALUE object_id)
         | 
| 497 | 
            +
            {
         | 
| 498 | 
            +
              if (FIXNUM_P(object_id) && object_id > 0 && object_id < 0x220001)
         | 
| 499 | 
            +
              {
         | 
| 500 | 
            +
                return FIX2ULONG(object_id);
         | 
| 501 | 
            +
              }
         | 
| 281 502 | 
             
              rb_raise(rb_eArgError, "CharacterSet members must be between 0 and 0x10FFFF");
         | 
| 282 503 | 
             
            }
         | 
| 283 504 |  | 
| 284 505 | 
             
            static inline VALUE
         | 
| 285 | 
            -
             | 
| 506 | 
            +
            cs_merge_rb_range(VALUE self, VALUE rb_range)
         | 
| 507 | 
            +
            {
         | 
| 286 508 | 
             
              VALUE from_id, upto_id;
         | 
| 509 | 
            +
              cs_cp from_cp, upto_cp, cont_len, rem;
         | 
| 287 510 | 
             
              int excl;
         | 
| 288 | 
            -
               | 
| 289 | 
            -
               | 
| 290 | 
            -
              FETCH_CODEPOINTS(self, cps);
         | 
| 511 | 
            +
              struct cs_data *data;
         | 
| 512 | 
            +
              data = cs_fetch_data(self);
         | 
| 291 513 |  | 
| 292 | 
            -
              if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl))) | 
| 514 | 
            +
              if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
         | 
| 515 | 
            +
              {
         | 
| 293 516 | 
             
                rb_raise(rb_eArgError, "pass a Range");
         | 
| 294 517 | 
             
              }
         | 
| 295 | 
            -
              if (excl) | 
| 518 | 
            +
              if (excl)
         | 
| 519 | 
            +
              {
         | 
| 520 | 
            +
                upto_id -= 2;
         | 
| 521 | 
            +
              }
         | 
| 522 | 
            +
             | 
| 523 | 
            +
              from_cp = cs_checked_cp(from_id);
         | 
| 524 | 
            +
              upto_cp = cs_checked_cp(upto_id);
         | 
| 296 525 |  | 
| 297 | 
            -
               | 
| 298 | 
            -
               | 
| 526 | 
            +
              if (upto_cp > from_cp && (upto_cp - from_cp > 6))
         | 
| 527 | 
            +
              {
         | 
| 528 | 
            +
                // set bits in preceding partially toggled bytes individually
         | 
| 529 | 
            +
                for (/* */; (from_cp <= upto_cp) && (from_cp % 8); from_cp++)
         | 
| 530 | 
            +
                {
         | 
| 531 | 
            +
                  set_cp(data, from_cp);
         | 
| 532 | 
            +
                }
         | 
| 533 | 
            +
                // memset contiguous bits directly
         | 
| 534 | 
            +
                cont_len = upto_cp - from_cp + 1;
         | 
| 535 | 
            +
                rem = cont_len % 8;
         | 
| 536 | 
            +
                ensure_memsize_fits(data, upto_cp);
         | 
| 537 | 
            +
                memset(data->cps + CS_MSIZE(from_cp), 0xFF, CS_MSIZE(cont_len - rem) / 8);
         | 
| 538 | 
            +
                from_cp = upto_cp - rem + 1;
         | 
| 539 | 
            +
              }
         | 
| 299 540 |  | 
| 300 | 
            -
               | 
| 301 | 
            -
             | 
| 302 | 
            -
             | 
| 541 | 
            +
              // set bits in partially toggled bytes individually
         | 
| 542 | 
            +
              for (/* */; from_cp <= upto_cp; from_cp++)
         | 
| 543 | 
            +
              {
         | 
| 544 | 
            +
                set_cp(data, from_cp);
         | 
| 303 545 | 
             
              }
         | 
| 546 | 
            +
             | 
| 304 547 | 
             
              return self;
         | 
| 305 548 | 
             
            }
         | 
| 306 549 |  | 
| 307 550 | 
             
            static inline VALUE
         | 
| 308 | 
            -
             | 
| 309 | 
            -
             | 
| 310 | 
            -
               | 
| 311 | 
            -
               | 
| 312 | 
            -
              FETCH_CODEPOINTS(self, cps);
         | 
| 551 | 
            +
            cs_merge_rb_array(VALUE self, VALUE rb_array)
         | 
| 552 | 
            +
            {
         | 
| 553 | 
            +
              VALUE el, array_length, i;
         | 
| 554 | 
            +
              struct cs_data *data;
         | 
| 313 555 | 
             
              Check_Type(rb_array, T_ARRAY);
         | 
| 556 | 
            +
              data = cs_fetch_data(self);
         | 
| 314 557 | 
             
              array_length = RARRAY_LEN(rb_array);
         | 
| 315 | 
            -
              for (i = 0; i < array_length; i++) | 
| 558 | 
            +
              for (i = 0; i < array_length; i++)
         | 
| 559 | 
            +
              {
         | 
| 316 560 | 
             
                el = RARRAY_AREF(rb_array, i);
         | 
| 317 | 
            -
                 | 
| 318 | 
            -
                SETBIT(cps, FIX2ULONG(el));
         | 
| 561 | 
            +
                set_cp(data, cs_checked_cp(el));
         | 
| 319 562 | 
             
              }
         | 
| 320 563 | 
             
              return self;
         | 
| 321 564 | 
             
            }
         | 
| 322 565 |  | 
| 323 566 | 
             
            static VALUE
         | 
| 324 | 
            -
             | 
| 567 | 
            +
            cs_method_merge(VALUE self, VALUE other)
         | 
| 568 | 
            +
            {
         | 
| 325 569 | 
             
              rb_check_frozen(self);
         | 
| 326 | 
            -
              if ( | 
| 327 | 
            -
             | 
| 570 | 
            +
              if (cs_check_type(other))
         | 
| 571 | 
            +
              {
         | 
| 572 | 
            +
                return cs_merge_cs(self, other);
         | 
| 328 573 | 
             
              }
         | 
| 329 | 
            -
              else if (TYPE(other) == T_ARRAY) | 
| 330 | 
            -
             | 
| 574 | 
            +
              else if (TYPE(other) == T_ARRAY)
         | 
| 575 | 
            +
              {
         | 
| 576 | 
            +
                return cs_merge_rb_array(self, other);
         | 
| 331 577 | 
             
              }
         | 
| 332 | 
            -
              return  | 
| 578 | 
            +
              return cs_merge_rb_range(self, other);
         | 
| 333 579 | 
             
            }
         | 
| 334 580 |  | 
| 335 581 | 
             
            static VALUE
         | 
| 336 | 
            -
             | 
| 337 | 
            -
             | 
| 338 | 
            -
               | 
| 582 | 
            +
            cs_method_initialize_copy(VALUE self, VALUE orig)
         | 
| 583 | 
            +
            {
         | 
| 584 | 
            +
              cs_merge_cs(self, orig);
         | 
| 585 | 
            +
              return self;
         | 
| 339 586 | 
             
            }
         | 
| 340 587 |  | 
| 341 588 | 
             
            static VALUE
         | 
| 342 | 
            -
             | 
| 589 | 
            +
            cs_method_subtract(VALUE self, VALUE other)
         | 
| 590 | 
            +
            {
         | 
| 591 | 
            +
              cs_cp cp, len, other_len;
         | 
| 592 | 
            +
              cs_ar *cps, *other_cps;
         | 
| 343 593 | 
             
              rb_check_frozen(self);
         | 
| 344 | 
            -
               | 
| 594 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 595 | 
            +
              other_cps = cs_fetch_cps(other, &other_len);
         | 
| 596 | 
            +
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
         | 
| 597 | 
            +
              {
         | 
| 598 | 
            +
                if (tst_cp(other_cps, other_len, cp))
         | 
| 599 | 
            +
                {
         | 
| 600 | 
            +
                  clr_cp(cps, len, cp);
         | 
| 601 | 
            +
                }
         | 
| 602 | 
            +
              }
         | 
| 345 603 | 
             
              return self;
         | 
| 346 604 | 
             
            }
         | 
| 347 605 |  | 
| 348 606 | 
             
            static inline int
         | 
| 349 | 
            -
             | 
| 350 | 
            -
             | 
| 351 | 
            -
               | 
| 607 | 
            +
            cs_a_subset_of_b(VALUE cs_a, VALUE cs_b, int *is_proper_ptr)
         | 
| 608 | 
            +
            {
         | 
| 609 | 
            +
              cs_ar *a, *b;
         | 
| 610 | 
            +
              cs_cp cp, alen, blen, count_a, count_b;
         | 
| 352 611 |  | 
| 353 | 
            -
              if (! | 
| 612 | 
            +
              if (!cs_check_type(cs_a) || !cs_check_type(cs_b))
         | 
| 613 | 
            +
              {
         | 
| 354 614 | 
             
                rb_raise(rb_eArgError, "pass a CharacterSet");
         | 
| 355 615 | 
             
              }
         | 
| 356 616 |  | 
| 357 | 
            -
               | 
| 358 | 
            -
               | 
| 359 | 
            -
             | 
| 360 | 
            -
               | 
| 361 | 
            -
               | 
| 362 | 
            -
             | 
| 363 | 
            -
             | 
| 364 | 
            -
               | 
| 365 | 
            -
                if ( | 
| 366 | 
            -
             | 
| 367 | 
            -
                   | 
| 368 | 
            -
                   | 
| 617 | 
            +
              a = cs_fetch_cps(cs_a, &alen);
         | 
| 618 | 
            +
              b = cs_fetch_cps(cs_b, &blen);
         | 
| 619 | 
            +
             | 
| 620 | 
            +
              count_a = 0;
         | 
| 621 | 
            +
              count_b = 0;
         | 
| 622 | 
            +
             | 
| 623 | 
            +
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
         | 
| 624 | 
            +
              {
         | 
| 625 | 
            +
                if (tst_cp(a, alen, cp))
         | 
| 626 | 
            +
                {
         | 
| 627 | 
            +
                  if (!tst_cp(b, blen, cp))
         | 
| 628 | 
            +
                  {
         | 
| 629 | 
            +
                    return 0;
         | 
| 630 | 
            +
                  }
         | 
| 631 | 
            +
                  count_a++;
         | 
| 632 | 
            +
                  count_b++;
         | 
| 633 | 
            +
                }
         | 
| 634 | 
            +
                else if (tst_cp(b, blen, cp))
         | 
| 635 | 
            +
                {
         | 
| 636 | 
            +
                  count_b++;
         | 
| 369 637 | 
             
                }
         | 
| 370 | 
            -
                else if (TSTBIT(cps_b, cp)) size_b++;
         | 
| 371 638 | 
             
              }
         | 
| 372 639 |  | 
| 373 | 
            -
              if ( | 
| 640 | 
            +
              if (is_proper_ptr)
         | 
| 641 | 
            +
              {
         | 
| 642 | 
            +
                *is_proper_ptr = count_b > count_a;
         | 
| 643 | 
            +
              }
         | 
| 644 | 
            +
             | 
| 374 645 | 
             
              return 1;
         | 
| 375 646 | 
             
            }
         | 
| 376 647 |  | 
| 377 648 | 
             
            static VALUE
         | 
| 378 | 
            -
             | 
| 379 | 
            -
             | 
| 380 | 
            -
              return  | 
| 649 | 
            +
            cs_method_subset_p(VALUE self, VALUE other)
         | 
| 650 | 
            +
            {
         | 
| 651 | 
            +
              return cs_a_subset_of_b(self, other, NULL) ? Qtrue : Qfalse;
         | 
| 381 652 | 
             
            }
         | 
| 382 653 |  | 
| 383 654 | 
             
            static VALUE
         | 
| 384 | 
            -
             | 
| 385 | 
            -
             | 
| 386 | 
            -
               | 
| 387 | 
            -
               | 
| 655 | 
            +
            cs_method_proper_subset_p(VALUE self, VALUE other)
         | 
| 656 | 
            +
            {
         | 
| 657 | 
            +
              int is_subset, is_proper;
         | 
| 658 | 
            +
              is_subset = cs_a_subset_of_b(self, other, &is_proper);
         | 
| 659 | 
            +
              return (is_subset && is_proper) ? Qtrue : Qfalse;
         | 
| 388 660 | 
             
            }
         | 
| 389 661 |  | 
| 390 662 | 
             
            static VALUE
         | 
| 391 | 
            -
             | 
| 392 | 
            -
             | 
| 393 | 
            -
              return  | 
| 663 | 
            +
            cs_method_superset_p(VALUE self, VALUE other)
         | 
| 664 | 
            +
            {
         | 
| 665 | 
            +
              return cs_a_subset_of_b(other, self, NULL) ? Qtrue : Qfalse;
         | 
| 394 666 | 
             
            }
         | 
| 395 667 |  | 
| 396 668 | 
             
            static VALUE
         | 
| 397 | 
            -
             | 
| 398 | 
            -
             | 
| 399 | 
            -
               | 
| 400 | 
            -
               | 
| 669 | 
            +
            cs_method_proper_superset_p(VALUE self, VALUE other)
         | 
| 670 | 
            +
            {
         | 
| 671 | 
            +
              int is_superset, is_proper;
         | 
| 672 | 
            +
              is_superset = cs_a_subset_of_b(other, self, &is_proper);
         | 
| 673 | 
            +
              return (is_superset && is_proper) ? Qtrue : Qfalse;
         | 
| 401 674 | 
             
            }
         | 
| 402 675 |  | 
| 403 676 | 
             
            // *******************************
         | 
| @@ -405,42 +678,43 @@ method_proper_superset_p(VALUE self, VALUE other) { | |
| 405 678 | 
             
            // *******************************
         | 
| 406 679 |  | 
| 407 680 | 
             
            static VALUE
         | 
| 408 | 
            -
             | 
| 409 | 
            -
             | 
| 410 | 
            -
               | 
| 681 | 
            +
            cs_class_method_from_ranges(VALUE self, VALUE ranges)
         | 
| 682 | 
            +
            {
         | 
| 683 | 
            +
              VALUE new_cs, range_count, i;
         | 
| 684 | 
            +
              new_cs = rb_class_new_instance(0, 0, self);
         | 
| 411 685 | 
             
              range_count = RARRAY_LEN(ranges);
         | 
| 412 | 
            -
              for (i = 0; i < range_count; i++) | 
| 413 | 
            -
             | 
| 686 | 
            +
              for (i = 0; i < range_count; i++)
         | 
| 687 | 
            +
              {
         | 
| 688 | 
            +
                cs_merge_rb_range(new_cs, RARRAY_AREF(ranges, i));
         | 
| 414 689 | 
             
              }
         | 
| 415 | 
            -
              return  | 
| 690 | 
            +
              return new_cs;
         | 
| 416 691 | 
             
            }
         | 
| 417 692 |  | 
| 418 693 | 
             
            static VALUE
         | 
| 419 | 
            -
             | 
| 420 | 
            -
             | 
| 694 | 
            +
            cs_method_ranges(VALUE self)
         | 
| 695 | 
            +
            {
         | 
| 696 | 
            +
              VALUE ranges, cp_num, previous_cp_num, current_start, current_end;
         | 
| 421 697 |  | 
| 422 698 | 
             
              ranges = rb_ary_new();
         | 
| 423 | 
            -
               | 
| 699 | 
            +
              previous_cp_num = 0;
         | 
| 424 700 | 
             
              current_start = 0;
         | 
| 425 701 | 
             
              current_end = 0;
         | 
| 426 702 |  | 
| 427 703 | 
             
              FOR_EACH_ACTIVE_CODEPOINT(
         | 
| 428 | 
            -
             | 
| 704 | 
            +
                  cp_num = LONG2FIX(cp);
         | 
| 429 705 |  | 
| 430 | 
            -
             | 
| 431 | 
            -
             | 
| 432 | 
            -
             | 
| 433 | 
            -
             | 
| 434 | 
            -
             | 
| 435 | 
            -
             | 
| 436 | 
            -
                   | 
| 437 | 
            -
             | 
| 438 | 
            -
                current_end = codepoint;
         | 
| 439 | 
            -
                previous_codepoint = codepoint;
         | 
| 440 | 
            -
              );
         | 
| 706 | 
            +
                  if (!previous_cp_num) {
         | 
| 707 | 
            +
                    current_start = cp_num;
         | 
| 708 | 
            +
                  } else if (previous_cp_num + 2 != cp_num) {
         | 
| 709 | 
            +
                    // gap found, finalize previous range
         | 
| 710 | 
            +
                    rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
         | 
| 711 | 
            +
                    current_start = cp_num;
         | 
| 712 | 
            +
                  } current_end = cp_num;
         | 
| 713 | 
            +
                  previous_cp_num = cp_num;);
         | 
| 441 714 |  | 
| 442 715 | 
             
              // add final range
         | 
| 443 | 
            -
              if (current_start) | 
| 716 | 
            +
              if (current_start)
         | 
| 717 | 
            +
              {
         | 
| 444 718 | 
             
                rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
         | 
| 445 719 | 
             
              }
         | 
| 446 720 |  | 
| @@ -448,117 +722,233 @@ method_ranges(VALUE self) { | |
| 448 722 | 
             
            }
         | 
| 449 723 |  | 
| 450 724 | 
             
            static VALUE
         | 
| 451 | 
            -
             | 
| 452 | 
            -
             | 
| 725 | 
            +
            cs_method_sample(int argc, VALUE *argv, VALUE self)
         | 
| 726 | 
            +
            {
         | 
| 727 | 
            +
              VALUE array, to_a_args[1] = {Qtrue};
         | 
| 453 728 | 
             
              rb_check_arity(argc, 0, 1);
         | 
| 454 | 
            -
               | 
| 455 | 
            -
              array = method_to_a(1, to_a_args, self);
         | 
| 729 | 
            +
              array = cs_method_to_a(1, to_a_args, self);
         | 
| 456 730 | 
             
              return rb_funcall(array, rb_intern("sample"), argc, argc ? argv[0] : 0);
         | 
| 457 731 | 
             
            }
         | 
| 458 732 |  | 
| 459 733 | 
             
            static inline VALUE
         | 
| 460 | 
            -
             | 
| 461 | 
            -
             | 
| 462 | 
            -
               | 
| 463 | 
            -
               | 
| 464 | 
            -
               | 
| 465 | 
            -
               | 
| 466 | 
            -
             | 
| 734 | 
            +
            cs_from_section(VALUE set, cs_cp from, cs_cp upto)
         | 
| 735 | 
            +
            {
         | 
| 736 | 
            +
              VALUE new_cs;
         | 
| 737 | 
            +
              cs_ar *cps;
         | 
| 738 | 
            +
              cs_cp cp, len;
         | 
| 739 | 
            +
              struct cs_data *new_data;
         | 
| 740 | 
            +
              new_cs = cs_alloc(RBASIC(set)->klass, &new_data);
         | 
| 741 | 
            +
              cps = cs_fetch_cps(set, &len);
         | 
| 742 | 
            +
              for (cp = from; cp <= upto; cp++)
         | 
| 743 | 
            +
              {
         | 
| 744 | 
            +
                if (tst_cp(cps, len, cp))
         | 
| 745 | 
            +
                {
         | 
| 746 | 
            +
                  set_cp(new_data, cp);
         | 
| 747 | 
            +
                }
         | 
| 467 748 | 
             
              }
         | 
| 468 | 
            -
              return  | 
| 749 | 
            +
              return new_cs;
         | 
| 469 750 | 
             
            }
         | 
| 470 751 |  | 
| 471 752 | 
             
            static VALUE
         | 
| 472 | 
            -
             | 
| 473 | 
            -
             | 
| 753 | 
            +
            cs_method_ext_section(VALUE self, VALUE from, VALUE upto)
         | 
| 754 | 
            +
            {
         | 
| 755 | 
            +
              return cs_from_section(self, FIX2ULONG(from), FIX2ULONG(upto));
         | 
| 756 | 
            +
            }
         | 
| 757 | 
            +
             | 
| 758 | 
            +
            static inline cs_cp
         | 
| 759 | 
            +
            cs_active_cp_count_in_section(VALUE set, cs_cp from, cs_cp upto)
         | 
| 760 | 
            +
            {
         | 
| 761 | 
            +
              cs_ar *cps;
         | 
| 762 | 
            +
              cs_cp cp, count, len;
         | 
| 763 | 
            +
              cps = cs_fetch_cps(set, &len);
         | 
| 764 | 
            +
              for (count = 0, cp = from; cp <= upto; cp++)
         | 
| 765 | 
            +
              {
         | 
| 766 | 
            +
                if (tst_cp(cps, len, cp))
         | 
| 767 | 
            +
                {
         | 
| 768 | 
            +
                  count++;
         | 
| 769 | 
            +
                }
         | 
| 770 | 
            +
              }
         | 
| 771 | 
            +
              return count;
         | 
| 474 772 | 
             
            }
         | 
| 475 773 |  | 
| 476 774 | 
             
            static VALUE
         | 
| 477 | 
            -
             | 
| 478 | 
            -
             | 
| 775 | 
            +
            cs_method_ext_count_in_section(VALUE self, VALUE from, VALUE upto)
         | 
| 776 | 
            +
            {
         | 
| 777 | 
            +
              cs_cp count;
         | 
| 778 | 
            +
              count = cs_active_cp_count_in_section(self, FIX2ULONG(from), FIX2ULONG(upto));
         | 
| 779 | 
            +
              return LONG2FIX(count);
         | 
| 479 780 | 
             
            }
         | 
| 480 781 |  | 
| 481 782 | 
             
            static inline VALUE
         | 
| 482 | 
            -
             | 
| 483 | 
            -
             | 
| 484 | 
            -
               | 
| 485 | 
            -
               | 
| 486 | 
            -
               | 
| 487 | 
            -
             | 
| 488 | 
            -
             | 
| 489 | 
            -
             | 
| 783 | 
            +
            cs_has_cp_in_section(cs_ar *cps, cs_cp len, cs_cp from, cs_cp upto)
         | 
| 784 | 
            +
            {
         | 
| 785 | 
            +
              cs_cp cp;
         | 
| 786 | 
            +
              for (cp = from; cp <= upto; cp++)
         | 
| 787 | 
            +
              {
         | 
| 788 | 
            +
                if (tst_cp(cps, len, cp))
         | 
| 789 | 
            +
                {
         | 
| 790 | 
            +
                  return Qtrue;
         | 
| 791 | 
            +
                }
         | 
| 490 792 | 
             
              }
         | 
| 491 793 | 
             
              return Qfalse;
         | 
| 492 794 | 
             
            }
         | 
| 493 795 |  | 
| 494 796 | 
             
            static VALUE
         | 
| 495 | 
            -
             | 
| 797 | 
            +
            cs_method_ext_section_p(VALUE self, VALUE from, VALUE upto)
         | 
| 798 | 
            +
            {
         | 
| 799 | 
            +
              cs_ar *cps;
         | 
| 800 | 
            +
              cs_cp len;
         | 
| 801 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 802 | 
            +
              return cs_has_cp_in_section(cps, len, FIX2ULONG(from), FIX2ULONG(upto));
         | 
| 803 | 
            +
            }
         | 
| 804 | 
            +
             | 
| 805 | 
            +
            static inline VALUE
         | 
| 806 | 
            +
            cs_ratio_of_section(VALUE set, cs_cp from, cs_cp upto)
         | 
| 807 | 
            +
            {
         | 
| 808 | 
            +
              double section_count, total_count;
         | 
| 809 | 
            +
              section_count = (double)cs_active_cp_count_in_section(set, from, upto);
         | 
| 810 | 
            +
              total_count = (double)cs_active_cp_count(set);
         | 
| 811 | 
            +
              return DBL2NUM(section_count / total_count);
         | 
| 812 | 
            +
            }
         | 
| 813 | 
            +
             | 
| 814 | 
            +
            static VALUE
         | 
| 815 | 
            +
            cs_method_ext_section_ratio(VALUE self, VALUE from, VALUE upto)
         | 
| 816 | 
            +
            {
         | 
| 817 | 
            +
              return cs_ratio_of_section(self, FIX2ULONG(from), FIX2ULONG(upto));
         | 
| 818 | 
            +
            }
         | 
| 819 | 
            +
             | 
| 820 | 
            +
            #define MAX_CP 0x10FFFF
         | 
| 821 | 
            +
            #define MAX_ASCII_CP 0x7F
         | 
| 822 | 
            +
            #define MAX_BMP_CP 0xFFFF
         | 
| 823 | 
            +
            #define MIN_ASTRAL_CP 0x10000
         | 
| 824 | 
            +
             | 
| 825 | 
            +
            static inline VALUE
         | 
| 826 | 
            +
            cs_has_cp_in_plane(cs_ar *cps, cs_cp len, unsigned int plane)
         | 
| 827 | 
            +
            {
         | 
| 828 | 
            +
              cs_cp plane_beg, plane_end;
         | 
| 829 | 
            +
              plane_beg = plane * UNICODE_PLANE_SIZE;
         | 
| 830 | 
            +
              plane_end = (plane + 1) * MAX_BMP_CP;
         | 
| 831 | 
            +
              return cs_has_cp_in_section(cps, len, plane_beg, plane_end);
         | 
| 832 | 
            +
            }
         | 
| 833 | 
            +
             | 
| 834 | 
            +
            static VALUE
         | 
| 835 | 
            +
            cs_method_planes(VALUE self)
         | 
| 836 | 
            +
            {
         | 
| 837 | 
            +
              cs_ar *cps;
         | 
| 838 | 
            +
              cs_cp len;
         | 
| 496 839 | 
             
              unsigned int i;
         | 
| 497 840 | 
             
              VALUE planes;
         | 
| 841 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 498 842 | 
             
              planes = rb_ary_new();
         | 
| 499 | 
            -
              for (i = 0; i < UNICODE_PLANE_COUNT; i++) | 
| 500 | 
            -
             | 
| 843 | 
            +
              for (i = 0; i < UNICODE_PLANE_COUNT; i++)
         | 
| 844 | 
            +
              {
         | 
| 845 | 
            +
                if (cs_has_cp_in_plane(cps, len, i))
         | 
| 846 | 
            +
                {
         | 
| 847 | 
            +
                  rb_ary_push(planes, INT2FIX(i));
         | 
| 848 | 
            +
                }
         | 
| 501 849 | 
             
              }
         | 
| 502 850 | 
             
              return planes;
         | 
| 503 851 | 
             
            }
         | 
| 504 852 |  | 
| 505 | 
            -
            static  | 
| 506 | 
            -
             | 
| 853 | 
            +
            static inline int
         | 
| 854 | 
            +
            cs_valid_plane_num(VALUE num)
         | 
| 855 | 
            +
            {
         | 
| 507 856 | 
             
              int plane;
         | 
| 508 | 
            -
              Check_Type( | 
| 509 | 
            -
              plane = FIX2INT( | 
| 510 | 
            -
              if (plane < 0 || plane >= UNICODE_PLANE_COUNT) | 
| 511 | 
            -
             | 
| 857 | 
            +
              Check_Type(num, T_FIXNUM);
         | 
| 858 | 
            +
              plane = FIX2INT(num);
         | 
| 859 | 
            +
              if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
         | 
| 860 | 
            +
              {
         | 
| 861 | 
            +
                rb_raise(rb_eArgError, "plane must be between 0 and %d", UNICODE_PLANE_COUNT - 1);
         | 
| 512 862 | 
             
              }
         | 
| 513 | 
            -
              return  | 
| 863 | 
            +
              return plane;
         | 
| 864 | 
            +
            }
         | 
| 865 | 
            +
             | 
| 866 | 
            +
            static VALUE
         | 
| 867 | 
            +
            cs_method_plane(VALUE self, VALUE plane_num)
         | 
| 868 | 
            +
            {
         | 
| 869 | 
            +
              cs_cp plane, plane_beg, plane_end;
         | 
| 870 | 
            +
              plane = cs_valid_plane_num(plane_num);
         | 
| 871 | 
            +
              plane_beg = plane * UNICODE_PLANE_SIZE;
         | 
| 872 | 
            +
              plane_end = (plane + 1) * MAX_BMP_CP;
         | 
| 873 | 
            +
              return cs_from_section(self, plane_beg, plane_end);
         | 
| 874 | 
            +
            }
         | 
| 875 | 
            +
             | 
| 876 | 
            +
            static VALUE
         | 
| 877 | 
            +
            cs_method_member_in_plane_p(VALUE self, VALUE plane_num)
         | 
| 878 | 
            +
            {
         | 
| 879 | 
            +
              cs_ar *cps;
         | 
| 880 | 
            +
              cs_cp len;
         | 
| 881 | 
            +
              unsigned int plane;
         | 
| 882 | 
            +
              plane = cs_valid_plane_num(plane_num);
         | 
| 883 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 884 | 
            +
              return cs_has_cp_in_plane(cps, len, plane);
         | 
| 514 885 | 
             
            }
         | 
| 515 886 |  | 
| 516 887 | 
             
            #define NON_SURROGATE(cp) (cp > 0xDFFF || cp < 0xD800)
         | 
| 517 888 |  | 
| 518 889 | 
             
            static VALUE
         | 
| 519 | 
            -
             | 
| 520 | 
            -
             | 
| 521 | 
            -
               | 
| 522 | 
            -
               | 
| 523 | 
            -
               | 
| 890 | 
            +
            cs_method_ext_inversion(int argc, VALUE *argv, VALUE self)
         | 
| 891 | 
            +
            {
         | 
| 892 | 
            +
              int inc_surr;
         | 
| 893 | 
            +
              cs_cp upto, cp, len;
         | 
| 894 | 
            +
              cs_ar *cps;
         | 
| 895 | 
            +
              VALUE new_cs;
         | 
| 896 | 
            +
              struct cs_data *new_data;
         | 
| 897 | 
            +
             | 
| 524 898 | 
             
              rb_check_arity(argc, 0, 2);
         | 
| 525 | 
            -
             | 
| 526 | 
            -
               | 
| 527 | 
            -
             | 
| 528 | 
            -
             | 
| 529 | 
            -
             | 
| 530 | 
            -
             | 
| 899 | 
            +
             | 
| 900 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 901 | 
            +
              inc_surr = argc && argv[0] == Qtrue;
         | 
| 902 | 
            +
              new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
         | 
| 903 | 
            +
              upto = argc > 1 && FIXNUM_P(argv[1]) ? FIX2ULONG(argv[1]) : UNICODE_CP_COUNT;
         | 
| 904 | 
            +
             | 
| 905 | 
            +
              for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
         | 
| 906 | 
            +
              {
         | 
| 907 | 
            +
                if (cp <= upto && !tst_cp(cps, len, cp) && (inc_surr || NON_SURROGATE(cp)))
         | 
| 908 | 
            +
                {
         | 
| 909 | 
            +
                  set_cp(new_data, cp);
         | 
| 910 | 
            +
                }
         | 
| 531 911 | 
             
              }
         | 
| 532 | 
            -
             | 
| 533 | 
            -
             | 
| 534 | 
            -
              );
         | 
| 912 | 
            +
             | 
| 913 | 
            +
              return new_cs;
         | 
| 535 914 | 
             
            }
         | 
| 536 915 |  | 
| 537 | 
            -
            typedef int(*str_cp_handler)(unsigned int,  | 
| 916 | 
            +
            typedef int (*str_cp_handler)(unsigned int, cs_ar *, cs_cp len, struct cs_data *data, VALUE *memo);
         | 
| 538 917 |  | 
| 539 918 | 
             
            static inline int
         | 
| 540 | 
            -
            add_str_cp_to_arr(unsigned int str_cp,  | 
| 541 | 
            -
             | 
| 919 | 
            +
            add_str_cp_to_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 920 | 
            +
            {
         | 
| 921 | 
            +
              set_cp(data, str_cp);
         | 
| 542 922 | 
             
              return 1;
         | 
| 543 923 | 
             
            }
         | 
| 544 924 |  | 
| 545 925 | 
             
            static VALUE
         | 
| 546 | 
            -
             | 
| 547 | 
            -
             | 
| 548 | 
            -
               | 
| 549 | 
            -
             | 
| 550 | 
            -
               | 
| 926 | 
            +
            cs_method_case_insensitive(VALUE self)
         | 
| 927 | 
            +
            {
         | 
| 928 | 
            +
              cs_cp i, len;
         | 
| 929 | 
            +
              cs_ar *cps;
         | 
| 930 | 
            +
              VALUE new_cs;
         | 
| 931 | 
            +
              struct cs_data *new_data;
         | 
| 551 932 |  | 
| 552 | 
            -
               | 
| 933 | 
            +
              cps = cs_fetch_cps(self, &len);
         | 
| 934 | 
            +
              new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
         | 
| 935 | 
            +
              cs_merge_cs(new_cs, self);
         | 
| 553 936 |  | 
| 554 | 
            -
              for (i = 0; i < CASEFOLD_COUNT; i++) | 
| 937 | 
            +
              for (i = 0; i < CASEFOLD_COUNT; i++)
         | 
| 938 | 
            +
              {
         | 
| 555 939 | 
             
                casefold_mapping m = unicode_casefold_table[i];
         | 
| 556 940 |  | 
| 557 | 
            -
                if | 
| 558 | 
            -
                 | 
| 941 | 
            +
                if (tst_cp(cps, len, m.from))
         | 
| 942 | 
            +
                {
         | 
| 943 | 
            +
                  set_cp(new_data, m.to);
         | 
| 944 | 
            +
                }
         | 
| 945 | 
            +
                else if (tst_cp(cps, len, m.to))
         | 
| 946 | 
            +
                {
         | 
| 947 | 
            +
                  set_cp(new_data, m.from);
         | 
| 948 | 
            +
                }
         | 
| 559 949 | 
             
              }
         | 
| 560 950 |  | 
| 561 | 
            -
              return  | 
| 951 | 
            +
              return new_cs;
         | 
| 562 952 |  | 
| 563 953 | 
             
              // OnigCaseFoldType flags;
         | 
| 564 954 | 
             
              // rb_encoding *enc;
         | 
| @@ -573,20 +963,27 @@ method_case_insensitive(VALUE self) { | |
| 573 963 | 
             
            }
         | 
| 574 964 |  | 
| 575 965 | 
             
            static inline VALUE
         | 
| 576 | 
            -
            each_sb_cp(VALUE str, str_cp_handler func,  | 
| 577 | 
            -
             | 
| 966 | 
            +
            each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 967 | 
            +
            {
         | 
| 968 | 
            +
              long i, str_len;
         | 
| 578 969 | 
             
              unsigned int str_cp;
         | 
| 970 | 
            +
              str_len = RSTRING_LEN(str);
         | 
| 579 971 |  | 
| 580 | 
            -
              for (i = 0; i <  | 
| 972 | 
            +
              for (i = 0; i < str_len; i++)
         | 
| 973 | 
            +
              {
         | 
| 581 974 | 
             
                str_cp = (RSTRING_PTR(str)[i] & 0xff);
         | 
| 582 | 
            -
                if (!(*func)(str_cp, cp_arr)) | 
| 975 | 
            +
                if (!(*func)(str_cp, cp_arr, len, data, memo))
         | 
| 976 | 
            +
                {
         | 
| 977 | 
            +
                  return Qfalse;
         | 
| 978 | 
            +
                }
         | 
| 583 979 | 
             
              }
         | 
| 584 980 |  | 
| 585 981 | 
             
              return Qtrue;
         | 
| 586 982 | 
             
            }
         | 
| 587 983 |  | 
| 588 984 | 
             
            static inline VALUE
         | 
| 589 | 
            -
            each_mb_cp(VALUE str, str_cp_handler func,  | 
| 985 | 
            +
            each_mb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 986 | 
            +
            {
         | 
| 590 987 | 
             
              int n;
         | 
| 591 988 | 
             
              unsigned int str_cp;
         | 
| 592 989 | 
             
              const char *ptr, *end;
         | 
| @@ -597,9 +994,13 @@ each_mb_cp(VALUE str, str_cp_handler func, cp_byte *cp_arr) { | |
| 597 994 | 
             
              end = RSTRING_END(str);
         | 
| 598 995 | 
             
              enc = rb_enc_get(str);
         | 
| 599 996 |  | 
| 600 | 
            -
              while (ptr < end) | 
| 997 | 
            +
              while (ptr < end)
         | 
| 998 | 
            +
              {
         | 
| 601 999 | 
             
                str_cp = rb_enc_codepoint_len(ptr, end, &n, enc);
         | 
| 602 | 
            -
                if (!(*func)(str_cp, cp_arr)) | 
| 1000 | 
            +
                if (!(*func)(str_cp, cp_arr, len, data, memo))
         | 
| 1001 | 
            +
                {
         | 
| 1002 | 
            +
                  return Qfalse;
         | 
| 1003 | 
            +
                }
         | 
| 603 1004 | 
             
                ptr += n;
         | 
| 604 1005 | 
             
              }
         | 
| 605 1006 |  | 
| @@ -611,105 +1012,236 @@ static inline int | |
| 611 1012 | 
             
            single_byte_optimizable(VALUE str)
         | 
| 612 1013 | 
             
            {
         | 
| 613 1014 | 
             
              rb_encoding *enc;
         | 
| 614 | 
            -
              if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) | 
| 1015 | 
            +
              if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
         | 
| 1016 | 
            +
              {
         | 
| 1017 | 
            +
                return 1;
         | 
| 1018 | 
            +
              }
         | 
| 615 1019 |  | 
| 616 1020 | 
             
              enc = rb_enc_get(str);
         | 
| 617 | 
            -
              if (rb_enc_mbmaxlen(enc) == 1) | 
| 1021 | 
            +
              if (rb_enc_mbmaxlen(enc) == 1)
         | 
| 1022 | 
            +
              {
         | 
| 1023 | 
            +
                return 1;
         | 
| 1024 | 
            +
              }
         | 
| 618 1025 |  | 
| 619 1026 | 
             
              return 0;
         | 
| 620 1027 | 
             
            }
         | 
| 621 1028 |  | 
| 622 1029 | 
             
            static inline VALUE
         | 
| 623 | 
            -
            each_cp(VALUE str, str_cp_handler func,  | 
| 624 | 
            -
             | 
| 625 | 
            -
             | 
| 1030 | 
            +
            each_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 1031 | 
            +
            {
         | 
| 1032 | 
            +
              if (single_byte_optimizable(str))
         | 
| 1033 | 
            +
              {
         | 
| 1034 | 
            +
                return each_sb_cp(str, func, cp_arr, len, data, memo);
         | 
| 626 1035 | 
             
              }
         | 
| 627 | 
            -
              return each_mb_cp(str, func, cp_arr);
         | 
| 1036 | 
            +
              return each_mb_cp(str, func, cp_arr, len, data, memo);
         | 
| 628 1037 | 
             
            }
         | 
| 629 1038 |  | 
| 630 1039 | 
             
            static inline void
         | 
| 631 | 
            -
            raise_arg_err_unless_string(VALUE val) | 
| 632 | 
            -
             | 
| 1040 | 
            +
            raise_arg_err_unless_string(VALUE val)
         | 
| 1041 | 
            +
            {
         | 
| 1042 | 
            +
              if (!RB_TYPE_P(val, T_STRING))
         | 
| 1043 | 
            +
              {
         | 
| 1044 | 
            +
                rb_raise(rb_eArgError, "pass a String");
         | 
| 1045 | 
            +
              }
         | 
| 633 1046 | 
             
            }
         | 
| 634 1047 |  | 
| 635 1048 | 
             
            static VALUE
         | 
| 636 | 
            -
             | 
| 637 | 
            -
             | 
| 1049 | 
            +
            cs_class_method_of(VALUE self, VALUE str)
         | 
| 1050 | 
            +
            {
         | 
| 1051 | 
            +
              VALUE new_cs;
         | 
| 1052 | 
            +
              struct cs_data *new_data;
         | 
| 1053 | 
            +
              new_cs = cs_alloc(self, &new_data);
         | 
| 638 1054 | 
             
              raise_arg_err_unless_string(str);
         | 
| 639 | 
            -
               | 
| 640 | 
            -
               | 
| 641 | 
            -
              return NEW_CHARACTER_SET(self, cp_arr);
         | 
| 1055 | 
            +
              each_cp(str, add_str_cp_to_arr, 0, 0, new_data, 0);
         | 
| 1056 | 
            +
              return new_cs;
         | 
| 642 1057 | 
             
            }
         | 
| 643 1058 |  | 
| 644 1059 | 
             
            static inline int
         | 
| 645 | 
            -
             | 
| 646 | 
            -
             | 
| 1060 | 
            +
            count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 1061 | 
            +
            {
         | 
| 1062 | 
            +
              if (tst_cp(cp_arr, len, str_cp))
         | 
| 1063 | 
            +
              {
         | 
| 1064 | 
            +
                *memo += 1;
         | 
| 1065 | 
            +
              }
         | 
| 1066 | 
            +
              return 1;
         | 
| 647 1067 | 
             
            }
         | 
| 648 1068 |  | 
| 649 1069 | 
             
            static VALUE
         | 
| 650 | 
            -
             | 
| 651 | 
            -
             | 
| 652 | 
            -
              VALUE  | 
| 1070 | 
            +
            cs_method_count_in(VALUE self, VALUE str)
         | 
| 1071 | 
            +
            {
         | 
| 1072 | 
            +
              VALUE count;
         | 
| 1073 | 
            +
              struct cs_data *data;
         | 
| 653 1074 | 
             
              raise_arg_err_unless_string(str);
         | 
| 654 | 
            -
               | 
| 655 | 
            -
               | 
| 656 | 
            -
               | 
| 1075 | 
            +
              data = cs_fetch_data(self);
         | 
| 1076 | 
            +
              count = 0;
         | 
| 1077 | 
            +
              each_cp(str, count_str_cp, data->cps, data->len, data, &count);
         | 
| 1078 | 
            +
              return INT2NUM((int)count);
         | 
| 1079 | 
            +
            }
         | 
| 1080 | 
            +
             | 
| 1081 | 
            +
            static inline int
         | 
| 1082 | 
            +
            str_cp_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 1083 | 
            +
            {
         | 
| 1084 | 
            +
              return tst_cp(cp_arr, len, str_cp);
         | 
| 1085 | 
            +
            }
         | 
| 1086 | 
            +
             | 
| 1087 | 
            +
            static VALUE
         | 
| 1088 | 
            +
            cs_method_cover_p(VALUE self, VALUE str)
         | 
| 1089 | 
            +
            {
         | 
| 1090 | 
            +
              struct cs_data *data;
         | 
| 1091 | 
            +
              raise_arg_err_unless_string(str);
         | 
| 1092 | 
            +
              data = cs_fetch_data(self);
         | 
| 1093 | 
            +
              return each_cp(str, str_cp_in_arr, data->cps, data->len, data, 0);
         | 
| 1094 | 
            +
            }
         | 
| 1095 | 
            +
             | 
| 1096 | 
            +
            static inline int
         | 
| 1097 | 
            +
            add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 1098 | 
            +
            {
         | 
| 1099 | 
            +
              if (tst_cp(cp_arr, len, str_cp))
         | 
| 1100 | 
            +
              {
         | 
| 1101 | 
            +
                rb_ary_push(memo[0], rb_enc_uint_chr((int)str_cp, (rb_encoding *)memo[1]));
         | 
| 1102 | 
            +
              }
         | 
| 1103 | 
            +
              return 1;
         | 
| 1104 | 
            +
            }
         | 
| 1105 | 
            +
             | 
| 1106 | 
            +
            static VALUE
         | 
| 1107 | 
            +
            cs_method_scan(VALUE self, VALUE str)
         | 
| 1108 | 
            +
            {
         | 
| 1109 | 
            +
              VALUE memo[2];
         | 
| 1110 | 
            +
              struct cs_data *data;
         | 
| 1111 | 
            +
              raise_arg_err_unless_string(str);
         | 
| 1112 | 
            +
              data = cs_fetch_data(self);
         | 
| 1113 | 
            +
              memo[0] = rb_ary_new();
         | 
| 1114 | 
            +
              memo[1] = (VALUE)rb_enc_get(str);
         | 
| 1115 | 
            +
              each_cp(str, add_str_cp_to_str_arr, data->cps, data->len, data, memo);
         | 
| 1116 | 
            +
              return memo[0];
         | 
| 657 1117 | 
             
            }
         | 
| 658 1118 |  | 
| 659 1119 | 
             
            static inline int
         | 
| 660 | 
            -
             | 
| 661 | 
            -
             | 
| 1120 | 
            +
            str_cp_not_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
         | 
| 1121 | 
            +
            {
         | 
| 1122 | 
            +
              return !tst_cp(cp_arr, len, str_cp);
         | 
| 662 1123 | 
             
            }
         | 
| 663 1124 |  | 
| 664 1125 | 
             
            static VALUE
         | 
| 665 | 
            -
             | 
| 666 | 
            -
             | 
| 1126 | 
            +
            cs_method_used_by_p(VALUE self, VALUE str)
         | 
| 1127 | 
            +
            {
         | 
| 1128 | 
            +
              VALUE only_uses_other_cps;
         | 
| 1129 | 
            +
              struct cs_data *data;
         | 
| 667 1130 | 
             
              raise_arg_err_unless_string(str);
         | 
| 668 | 
            -
               | 
| 669 | 
            -
               | 
| 1131 | 
            +
              data = cs_fetch_data(self);
         | 
| 1132 | 
            +
              only_uses_other_cps = each_cp(str, str_cp_not_in_arr, data->cps, data->len, data, 0);
         | 
| 1133 | 
            +
              return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
         | 
| 1134 | 
            +
            }
         | 
| 1135 | 
            +
             | 
| 1136 | 
            +
            static void
         | 
| 1137 | 
            +
            cs_str_buf_cat(VALUE str, const char *ptr, long len)
         | 
| 1138 | 
            +
            {
         | 
| 1139 | 
            +
              long total, olen;
         | 
| 1140 | 
            +
              char *sptr;
         | 
| 1141 | 
            +
             | 
| 1142 | 
            +
              RSTRING_GETMEM(str, sptr, olen);
         | 
| 1143 | 
            +
              sptr = RSTRING(str)->as.heap.ptr;
         | 
| 1144 | 
            +
              olen = RSTRING(str)->as.heap.len;
         | 
| 1145 | 
            +
              total = olen + len;
         | 
| 1146 | 
            +
              memcpy(sptr + olen, ptr, len);
         | 
| 1147 | 
            +
              RSTRING(str)->as.heap.len = total;
         | 
| 1148 | 
            +
            }
         | 
| 1149 | 
            +
             | 
| 1150 | 
            +
            #ifndef TERM_FILL
         | 
| 1151 | 
            +
            #define TERM_FILL(ptr, termlen)                     \
         | 
| 1152 | 
            +
              do                                                \
         | 
| 1153 | 
            +
              {                                                 \
         | 
| 1154 | 
            +
                char *const term_fill_ptr = (ptr);              \
         | 
| 1155 | 
            +
                const int term_fill_len = (termlen);            \
         | 
| 1156 | 
            +
                *term_fill_ptr = '\0';                          \
         | 
| 1157 | 
            +
                if (__builtin_expect(!!(term_fill_len > 1), 0)) \
         | 
| 1158 | 
            +
                  memset(term_fill_ptr, 0, term_fill_len);      \
         | 
| 1159 | 
            +
              } while (0)
         | 
| 1160 | 
            +
            #endif
         | 
| 1161 | 
            +
             | 
| 1162 | 
            +
            static void
         | 
| 1163 | 
            +
            cs_str_buf_terminate(VALUE str, rb_encoding *enc)
         | 
| 1164 | 
            +
            {
         | 
| 1165 | 
            +
              char *ptr;
         | 
| 1166 | 
            +
              long len;
         | 
| 1167 | 
            +
             | 
| 1168 | 
            +
              ptr = RSTRING(str)->as.heap.ptr;
         | 
| 1169 | 
            +
              len = RSTRING(str)->as.heap.len;
         | 
| 1170 | 
            +
              TERM_FILL(ptr + len, rb_enc_mbminlen(enc));
         | 
| 670 1171 | 
             
            }
         | 
| 671 1172 |  | 
| 672 1173 | 
             
            static inline VALUE
         | 
| 673 | 
            -
             | 
| 674 | 
            -
             | 
| 1174 | 
            +
            cs_apply_to_str(VALUE set, VALUE str, int delete, int bang)
         | 
| 1175 | 
            +
            {
         | 
| 1176 | 
            +
              cs_ar *cps;
         | 
| 1177 | 
            +
              cs_cp len;
         | 
| 675 1178 | 
             
              rb_encoding *str_enc;
         | 
| 676 | 
            -
              VALUE orig_len,  | 
| 677 | 
            -
              int  | 
| 1179 | 
            +
              VALUE orig_len, new_str_buf;
         | 
| 1180 | 
            +
              int cp_len;
         | 
| 678 1181 | 
             
              unsigned int str_cp;
         | 
| 679 1182 | 
             
              const char *ptr, *end;
         | 
| 680 1183 |  | 
| 681 1184 | 
             
              raise_arg_err_unless_string(str);
         | 
| 682 1185 |  | 
| 683 | 
            -
               | 
| 1186 | 
            +
              cps = cs_fetch_cps(set, &len);
         | 
| 684 1187 |  | 
| 685 1188 | 
             
              orig_len = RSTRING_LEN(str);
         | 
| 686 | 
            -
               | 
| 687 | 
            -
               | 
| 1189 | 
            +
              if (orig_len < 1) // empty string, will never change
         | 
| 1190 | 
            +
              {
         | 
| 1191 | 
            +
                if (bang)
         | 
| 1192 | 
            +
                {
         | 
| 1193 | 
            +
                  return Qnil;
         | 
| 1194 | 
            +
                }
         | 
| 1195 | 
            +
                return rb_str_dup(str);
         | 
| 1196 | 
            +
              }
         | 
| 1197 | 
            +
             | 
| 1198 | 
            +
              new_str_buf = rb_str_buf_new(orig_len + 30); // len + margin
         | 
| 688 1199 | 
             
              str_enc = rb_enc_get(str);
         | 
| 689 1200 | 
             
              rb_enc_associate(new_str_buf, str_enc);
         | 
| 690 | 
            -
               | 
| 691 | 
            -
             | 
| 1201 | 
            +
              rb_str_modify(new_str_buf);
         | 
| 1202 | 
            +
              ENC_CODERANGE_SET(new_str_buf, rb_enc_asciicompat(str_enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID);
         | 
| 692 1203 |  | 
| 693 1204 | 
             
              ptr = RSTRING_PTR(str);
         | 
| 694 1205 | 
             
              end = RSTRING_END(str);
         | 
| 695 1206 |  | 
| 696 | 
            -
               | 
| 697 | 
            -
             | 
| 698 | 
            -
                 | 
| 699 | 
            -
             | 
| 700 | 
            -
                   | 
| 1207 | 
            +
              if (single_byte_optimizable(str))
         | 
| 1208 | 
            +
              {
         | 
| 1209 | 
            +
                while (ptr < end)
         | 
| 1210 | 
            +
                {
         | 
| 1211 | 
            +
                  str_cp = *ptr & 0xff;
         | 
| 1212 | 
            +
                  if ((!tst_cp(cps, len, str_cp)) == delete)
         | 
| 1213 | 
            +
                  {
         | 
| 1214 | 
            +
                    cs_str_buf_cat(new_str_buf, ptr, 1);
         | 
| 1215 | 
            +
                  }
         | 
| 1216 | 
            +
                  ptr++;
         | 
| 1217 | 
            +
                }
         | 
| 1218 | 
            +
              }
         | 
| 1219 | 
            +
              else // likely to be multibyte string
         | 
| 1220 | 
            +
              {
         | 
| 1221 | 
            +
                while (ptr < end)
         | 
| 1222 | 
            +
                {
         | 
| 1223 | 
            +
                  str_cp = rb_enc_codepoint_len(ptr, end, &cp_len, str_enc);
         | 
| 1224 | 
            +
                  if ((!tst_cp(cps, len, str_cp)) == delete)
         | 
| 1225 | 
            +
                  {
         | 
| 1226 | 
            +
                    cs_str_buf_cat(new_str_buf, ptr, cp_len);
         | 
| 1227 | 
            +
                  }
         | 
| 1228 | 
            +
                  ptr += cp_len;
         | 
| 701 1229 | 
             
                }
         | 
| 702 | 
            -
                ptr += n;
         | 
| 703 1230 | 
             
              }
         | 
| 704 1231 |  | 
| 705 | 
            -
               | 
| 706 | 
            -
             | 
| 1232 | 
            +
              cs_str_buf_terminate(new_str_buf, str_enc);
         | 
| 1233 | 
            +
             | 
| 1234 | 
            +
              if (bang)
         | 
| 1235 | 
            +
              {
         | 
| 1236 | 
            +
                if (RSTRING_LEN(new_str_buf) == (long)orig_len) // string unchanged
         | 
| 1237 | 
            +
                {
         | 
| 1238 | 
            +
                  return Qnil;
         | 
| 1239 | 
            +
                }
         | 
| 707 1240 | 
             
                rb_str_shared_replace(str, new_str_buf);
         | 
| 708 1241 | 
             
              }
         | 
| 709 | 
            -
              else | 
| 1242 | 
            +
              else
         | 
| 1243 | 
            +
              {
         | 
| 710 1244 | 
             
                RB_OBJ_WRITE(new_str_buf, &(RBASIC(new_str_buf))->klass, rb_obj_class(str));
         | 
| 711 | 
            -
                // slightly cumbersome approach needed for compatibility with Ruby < 2.3:
         | 
| 712 | 
            -
                RBASIC(new_str_buf)->flags |= (RBASIC(str)->flags&(FL_TAINT));
         | 
| 713 1245 | 
             
                str = new_str_buf;
         | 
| 714 1246 | 
             
              }
         | 
| 715 1247 |  | 
| @@ -717,98 +1249,115 @@ apply_to_str(VALUE set, VALUE str, int delete, int bang) { | |
| 717 1249 | 
             
            }
         | 
| 718 1250 |  | 
| 719 1251 | 
             
            static VALUE
         | 
| 720 | 
            -
             | 
| 721 | 
            -
             | 
| 1252 | 
            +
            cs_method_delete_in(VALUE self, VALUE str)
         | 
| 1253 | 
            +
            {
         | 
| 1254 | 
            +
              return cs_apply_to_str(self, str, 1, 0);
         | 
| 1255 | 
            +
            }
         | 
| 1256 | 
            +
             | 
| 1257 | 
            +
            static VALUE
         | 
| 1258 | 
            +
            cs_method_delete_in_bang(VALUE self, VALUE str)
         | 
| 1259 | 
            +
            {
         | 
| 1260 | 
            +
              return cs_apply_to_str(self, str, 1, 1);
         | 
| 722 1261 | 
             
            }
         | 
| 723 1262 |  | 
| 724 1263 | 
             
            static VALUE
         | 
| 725 | 
            -
             | 
| 726 | 
            -
             | 
| 1264 | 
            +
            cs_method_keep_in(VALUE self, VALUE str)
         | 
| 1265 | 
            +
            {
         | 
| 1266 | 
            +
              return cs_apply_to_str(self, str, 0, 0);
         | 
| 727 1267 | 
             
            }
         | 
| 728 1268 |  | 
| 729 1269 | 
             
            static VALUE
         | 
| 730 | 
            -
             | 
| 731 | 
            -
             | 
| 1270 | 
            +
            cs_method_keep_in_bang(VALUE self, VALUE str)
         | 
| 1271 | 
            +
            {
         | 
| 1272 | 
            +
              return cs_apply_to_str(self, str, 0, 1);
         | 
| 732 1273 | 
             
            }
         | 
| 733 1274 |  | 
| 734 1275 | 
             
            static VALUE
         | 
| 735 | 
            -
             | 
| 736 | 
            -
             | 
| 1276 | 
            +
            cs_method_allocated_length(VALUE self)
         | 
| 1277 | 
            +
            {
         | 
| 1278 | 
            +
              return LONG2FIX(cs_fetch_data(self)->len);
         | 
| 737 1279 | 
             
            }
         | 
| 738 1280 |  | 
| 739 1281 | 
             
            // ****
         | 
| 740 1282 | 
             
            // init
         | 
| 741 1283 | 
             
            // ****
         | 
| 742 1284 |  | 
| 743 | 
            -
            void
         | 
| 744 | 
            -
            Init_character_set()
         | 
| 1285 | 
            +
            void Init_character_set()
         | 
| 745 1286 | 
             
            {
         | 
| 746 1287 | 
             
              VALUE cs = rb_define_class("CharacterSet", rb_cObject);
         | 
| 747 1288 |  | 
| 748 | 
            -
              rb_define_alloc_func(cs,  | 
| 1289 | 
            +
              rb_define_alloc_func(cs, cs_method_allocate);
         | 
| 749 1290 |  | 
| 750 1291 | 
             
              // `Set` compatibility methods
         | 
| 751 1292 |  | 
| 752 | 
            -
              rb_define_method(cs, "each", | 
| 753 | 
            -
              rb_define_method(cs, "to_a", | 
| 754 | 
            -
              rb_define_method(cs, "length", | 
| 755 | 
            -
              rb_define_method(cs, "size", | 
| 756 | 
            -
              rb_define_method(cs, " | 
| 757 | 
            -
              rb_define_method(cs, " | 
| 758 | 
            -
              rb_define_method(cs, " | 
| 759 | 
            -
              rb_define_method(cs, " | 
| 760 | 
            -
              rb_define_method(cs, " | 
| 761 | 
            -
              rb_define_method(cs, " | 
| 762 | 
            -
              rb_define_method(cs, " | 
| 763 | 
            -
              rb_define_method(cs, " | 
| 764 | 
            -
              rb_define_method(cs, " | 
| 765 | 
            -
              rb_define_method(cs, " | 
| 766 | 
            -
              rb_define_method(cs, " | 
| 767 | 
            -
              rb_define_method(cs, " | 
| 768 | 
            -
              rb_define_method(cs, " | 
| 769 | 
            -
              rb_define_method(cs, " | 
| 770 | 
            -
              rb_define_method(cs, " | 
| 771 | 
            -
              rb_define_method(cs, " | 
| 772 | 
            -
              rb_define_method(cs, " | 
| 773 | 
            -
              rb_define_method(cs, " | 
| 774 | 
            -
              rb_define_method(cs, " | 
| 775 | 
            -
              rb_define_method(cs, "add | 
| 776 | 
            -
              rb_define_method(cs, " | 
| 777 | 
            -
              rb_define_method(cs, " | 
| 778 | 
            -
              rb_define_method(cs, " | 
| 779 | 
            -
              rb_define_method(cs, " | 
| 780 | 
            -
              rb_define_method(cs, " | 
| 781 | 
            -
              rb_define_method(cs, " | 
| 782 | 
            -
              rb_define_method(cs, " | 
| 783 | 
            -
              rb_define_method(cs, " | 
| 784 | 
            -
              rb_define_method(cs, " | 
| 785 | 
            -
              rb_define_method(cs, " | 
| 786 | 
            -
              rb_define_method(cs, " | 
| 787 | 
            -
              rb_define_method(cs, " | 
| 788 | 
            -
              rb_define_method(cs, " | 
| 789 | 
            -
              rb_define_method(cs, " | 
| 790 | 
            -
              rb_define_method(cs, " | 
| 791 | 
            -
              rb_define_method(cs, " | 
| 792 | 
            -
              rb_define_method(cs, " | 
| 793 | 
            -
              rb_define_method(cs, " | 
| 1293 | 
            +
              rb_define_method(cs, "each", cs_method_each, 0);
         | 
| 1294 | 
            +
              rb_define_method(cs, "to_a", cs_method_to_a, -1);
         | 
| 1295 | 
            +
              rb_define_method(cs, "length", cs_method_length, 0);
         | 
| 1296 | 
            +
              rb_define_method(cs, "size", cs_method_length, 0);
         | 
| 1297 | 
            +
              rb_define_method(cs, "empty?", cs_method_empty_p, 0);
         | 
| 1298 | 
            +
              rb_define_method(cs, "hash", cs_method_hash, 0);
         | 
| 1299 | 
            +
              rb_define_method(cs, "keep_if", cs_method_keep_if, 0);
         | 
| 1300 | 
            +
              rb_define_method(cs, "delete_if", cs_method_delete_if, 0);
         | 
| 1301 | 
            +
              rb_define_method(cs, "clear", cs_method_clear, 0);
         | 
| 1302 | 
            +
              rb_define_method(cs, "min", cs_method_min, 0);
         | 
| 1303 | 
            +
              rb_define_method(cs, "max", cs_method_max, 0);
         | 
| 1304 | 
            +
              rb_define_method(cs, "minmax", cs_method_minmax, 0);
         | 
| 1305 | 
            +
              rb_define_method(cs, "intersection", cs_method_intersection, 1);
         | 
| 1306 | 
            +
              rb_define_method(cs, "&", cs_method_intersection, 1);
         | 
| 1307 | 
            +
              rb_define_method(cs, "union", cs_method_union, 1);
         | 
| 1308 | 
            +
              rb_define_method(cs, "+", cs_method_union, 1);
         | 
| 1309 | 
            +
              rb_define_method(cs, "|", cs_method_union, 1);
         | 
| 1310 | 
            +
              rb_define_method(cs, "difference", cs_method_difference, 1);
         | 
| 1311 | 
            +
              rb_define_method(cs, "-", cs_method_difference, 1);
         | 
| 1312 | 
            +
              rb_define_method(cs, "^", cs_method_exclusion, 1);
         | 
| 1313 | 
            +
              rb_define_method(cs, "include?", cs_method_include_p, 1);
         | 
| 1314 | 
            +
              rb_define_method(cs, "member?", cs_method_include_p, 1);
         | 
| 1315 | 
            +
              rb_define_method(cs, "===", cs_method_include_p, 1);
         | 
| 1316 | 
            +
              rb_define_method(cs, "add", cs_method_add, 1);
         | 
| 1317 | 
            +
              rb_define_method(cs, "<<", cs_method_add, 1);
         | 
| 1318 | 
            +
              rb_define_method(cs, "add?", cs_method_add_p, 1);
         | 
| 1319 | 
            +
              rb_define_method(cs, "delete", cs_method_delete, 1);
         | 
| 1320 | 
            +
              rb_define_method(cs, "delete?", cs_method_delete_p, 1);
         | 
| 1321 | 
            +
              rb_define_method(cs, "intersect?", cs_method_intersect_p, 1);
         | 
| 1322 | 
            +
              rb_define_method(cs, "disjoint?", cs_method_disjoint_p, 1);
         | 
| 1323 | 
            +
              rb_define_method(cs, "eql?", cs_method_eql_p, 1);
         | 
| 1324 | 
            +
              rb_define_method(cs, "==", cs_method_eql_p, 1);
         | 
| 1325 | 
            +
              rb_define_method(cs, "merge", cs_method_merge, 1);
         | 
| 1326 | 
            +
              rb_define_method(cs, "initialize_clone", cs_method_initialize_copy, 1);
         | 
| 1327 | 
            +
              rb_define_method(cs, "initialize_dup", cs_method_initialize_copy, 1);
         | 
| 1328 | 
            +
              rb_define_method(cs, "subtract", cs_method_subtract, 1);
         | 
| 1329 | 
            +
              rb_define_method(cs, "subset?", cs_method_subset_p, 1);
         | 
| 1330 | 
            +
              rb_define_method(cs, "<=", cs_method_subset_p, 1);
         | 
| 1331 | 
            +
              rb_define_method(cs, "proper_subset?", cs_method_proper_subset_p, 1);
         | 
| 1332 | 
            +
              rb_define_method(cs, "<", cs_method_proper_subset_p, 1);
         | 
| 1333 | 
            +
              rb_define_method(cs, "superset?", cs_method_superset_p, 1);
         | 
| 1334 | 
            +
              rb_define_method(cs, ">=", cs_method_superset_p, 1);
         | 
| 1335 | 
            +
              rb_define_method(cs, "proper_superset?", cs_method_proper_superset_p, 1);
         | 
| 1336 | 
            +
              rb_define_method(cs, ">", cs_method_proper_superset_p, 1);
         | 
| 794 1337 |  | 
| 795 1338 | 
             
              // `CharacterSet`-specific methods
         | 
| 796 1339 |  | 
| 797 | 
            -
              rb_define_singleton_method(cs, "from_ranges",  | 
| 798 | 
            -
              rb_define_singleton_method(cs, "of", | 
| 799 | 
            -
             | 
| 800 | 
            -
              rb_define_method(cs, "ranges", | 
| 801 | 
            -
              rb_define_method(cs, "sample", | 
| 802 | 
            -
              rb_define_method(cs, " | 
| 803 | 
            -
              rb_define_method(cs, " | 
| 804 | 
            -
              rb_define_method(cs, " | 
| 805 | 
            -
              rb_define_method(cs, " | 
| 806 | 
            -
              rb_define_method(cs, " | 
| 807 | 
            -
              rb_define_method(cs, " | 
| 808 | 
            -
              rb_define_method(cs, " | 
| 809 | 
            -
              rb_define_method(cs, " | 
| 810 | 
            -
              rb_define_method(cs, " | 
| 811 | 
            -
              rb_define_method(cs, " | 
| 812 | 
            -
              rb_define_method(cs, " | 
| 813 | 
            -
              rb_define_method(cs, " | 
| 1340 | 
            +
              rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
         | 
| 1341 | 
            +
              rb_define_singleton_method(cs, "of", cs_class_method_of, 1);
         | 
| 1342 | 
            +
             | 
| 1343 | 
            +
              rb_define_method(cs, "ranges", cs_method_ranges, 0);
         | 
| 1344 | 
            +
              rb_define_method(cs, "sample", cs_method_sample, -1);
         | 
| 1345 | 
            +
              rb_define_method(cs, "ext_section", cs_method_ext_section, 2);
         | 
| 1346 | 
            +
              rb_define_method(cs, "ext_count_in_section", cs_method_ext_count_in_section, 2);
         | 
| 1347 | 
            +
              rb_define_method(cs, "ext_section?", cs_method_ext_section_p, 2);
         | 
| 1348 | 
            +
              rb_define_method(cs, "ext_section_ratio", cs_method_ext_section_ratio, 2);
         | 
| 1349 | 
            +
              rb_define_method(cs, "planes", cs_method_planes, 0);
         | 
| 1350 | 
            +
              rb_define_method(cs, "plane", cs_method_plane, 1);
         | 
| 1351 | 
            +
              rb_define_method(cs, "member_in_plane?", cs_method_member_in_plane_p, 1);
         | 
| 1352 | 
            +
              rb_define_method(cs, "ext_inversion", cs_method_ext_inversion, -1);
         | 
| 1353 | 
            +
              rb_define_method(cs, "case_insensitive", cs_method_case_insensitive, 0);
         | 
| 1354 | 
            +
              rb_define_method(cs, "count_in", cs_method_count_in, 1);
         | 
| 1355 | 
            +
              rb_define_method(cs, "cover?", cs_method_cover_p, 1);
         | 
| 1356 | 
            +
              rb_define_method(cs, "delete_in", cs_method_delete_in, 1);
         | 
| 1357 | 
            +
              rb_define_method(cs, "delete_in!", cs_method_delete_in_bang, 1);
         | 
| 1358 | 
            +
              rb_define_method(cs, "keep_in", cs_method_keep_in, 1);
         | 
| 1359 | 
            +
              rb_define_method(cs, "keep_in!", cs_method_keep_in_bang, 1);
         | 
| 1360 | 
            +
              rb_define_method(cs, "scan", cs_method_scan, 1);
         | 
| 1361 | 
            +
              rb_define_method(cs, "used_by?", cs_method_used_by_p, 1);
         | 
| 1362 | 
            +
              rb_define_method(cs, "allocated_length", cs_method_allocated_length, 0);
         | 
| 814 1363 | 
             
            }
         |