RubyGems - character_set - Versions diffs - 1.4.1 → 1.6.0 - Mend

character_set 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/.gitattributes +1 -1
data/.github/workflows/gouteur.yml +20 -0
data/.github/workflows/tests.yml +6 -2
data/.gitignore +1 -0
data/.gouteur.yml +2 -0
data/.rubocop.yml +7 -1
data/BENCHMARK.md +35 -31
data/CHANGELOG.md +32 -0
data/Gemfile +14 -0
data/README.md +22 -6
data/Rakefile +5 -2
data/benchmarks/delete_in.rb +5 -1
data/benchmarks/keep_in.rb +5 -1
data/character_set.gemspec +0 -13
data/ext/character_set/character_set.c +59 -90
data/ext/character_set/unicode_casefold_table.h +44 -1
data/lib/character_set/core_ext/string_ext.rb +1 -1
data/lib/character_set/expression_converter.rb +23 -23
data/lib/character_set/predefined_sets/assigned.cps +51 -40
data/lib/character_set/predefined_sets/emoji.cps +12 -11
data/lib/character_set/predefined_sets.rb +11 -0
data/lib/character_set/ruby_fallback/character_set_methods.rb +3 -3
data/lib/character_set/set_method_adapters.rb +4 -3
data/lib/character_set/shared_methods.rb +15 -1
data/lib/character_set/version.rb +1 -1
metadata +5 -143

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 7a91fd10258c312d27d3fa84f99f1a97168d12ca08a3911fe31485565a999246
-  data.tar.gz: 2f16c02b72302259bccda6f2bf731950bd6dc8c679af8812c414ac313f1d8fc2
+  metadata.gz: e216e6c199ac9443cda9180a9e35d5ed92b50b45c12e7f64f45d74ecd2cf08d6
+  data.tar.gz: 5f3634d426dc33875d6c197ce75466544d97808b1e8b1858ac56d93422b226e8
 SHA512:
-  metadata.gz: cab6e94ec0a7efc2f26eba33dd1b4d5af639905d23422ec61420411325832a998c07359a4bf50c24379ec4550784ebc6da0effec4c917e7859392345ce9b8db0
-  data.tar.gz: a2dc319a9f8085e85624f25cc6f12dc03992b50f3f1a8d2000e1b69dadfdc4219c887452bdffbb213a91e1cad2011f237f604aa6fdb7e93243304d22fb5adfa3
+  metadata.gz: d24cfaa40b6e4e472e1f76cc8b6f7f3f1282e6830c0cbf76c4810c0f6f365c7419a19816d0b741cee99eb428dae03fc1d60eecab7d1ba6d210015f0cf2d5ee14
+  data.tar.gz: 2bd7ea63b286e106358293b1428a687374d0cd2cdc985b2da5b5cf1f45c6c541cb0ddde5b06477243cf4011065cfac7fa6bb8a521fb144a750c90039d268f03b

data/.gitattributes CHANGED Viewed

@@ -1,3 +1,3 @@
 *.cps linguist-detectable=false
 benchmarks/* linguist-detectable=false
-spec/ruby-spec/* linguist-vendored
+spec/* linguist-detectable=false

data/.github/workflows/gouteur.yml ADDED Viewed

@@ -0,0 +1,20 @@
+name: gouteur
+on: [push, pull_request]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Ruby
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: 2.7
+      - name: Prepare
+        run: |
+          bundle install --jobs 4
+          bundle exec rake compile
+      - name: Test
+        run: bundle exec gouteur

data/.github/workflows/tests.yml CHANGED Viewed

@@ -1,6 +1,10 @@
 name: tests
-on: [push, pull_request]
+on:
+  push:
+  pull_request:
+  schedule:
+    - cron: '11 11 14 * *' # at 11:11 am on the 14th of every month
 jobs:
   build:
@@ -8,7 +12,7 @@ jobs:
     strategy:
       matrix:
-        ruby: [ '2.2', '2.7', '3.0', 'ruby-head', 'jruby-head' ]
+        ruby: [ '2.2', '2.7', '3.0', '3.1', 'ruby-head', 'jruby-head' ]
     steps:
       - uses: actions/checkout@v2

data/.gitignore CHANGED Viewed

@@ -15,6 +15,7 @@
 .ruby-version
 .tags
 .tags1
+.tool-versions
 .vscode
 bbin/
 binstubs/*

data/.gouteur.yml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ repos:
2	+ - uri: https://github.com/jaynetics/js_regex

data/.rubocop.yml CHANGED Viewed

@@ -8,4 +8,10 @@ AllCops:
   RubyInterpreters:
     - ruby
     - rake
-  TargetRubyVersion: 2.4 # really 2.1, but 2.4 is lowest supported by rubocop
+  TargetRubyVersion: 2.5 # really 2.1, but 2.5 is lowest supported by rubocop
+Lint/AmbiguousOperatorPrecedence:
+  Enabled: false
+Lint/AmbiguousRegexpLiteral:
+  Enabled: false

data/BENCHMARK.md CHANGED Viewed

@@ -1,86 +1,90 @@
-Results of `rake:benchmark` on ruby 3.0.0p0 (2020-12-25 revision 95aff21468) [x86_64-darwin19]
+Results of `rake:benchmark` on ruby 3.2.0dev (2022-02-14T14:35:54Z master 26187a8520) [arm64-darwin21]
 ```
 Counting non-letters
-CharacterSet#count_in:  9472902.2 i/s
-        String#count:  2221799.9 i/s - 4.26x slower
+CharacterSet#count_in: 14794607.9 i/s
+        String#count:  3875939.3 i/s - 3.82x slower
 ```
 ```
 Detecting non-whitespace
- CharacterSet#cover?: 12388427.2 i/s
-       Regexp#match?:  7901676.8 i/s - 1.57x slower
+ CharacterSet#cover?: 17448329.0 i/s
+       Regexp#match?: 13089358.1 i/s - 1.33x slower
 ```
 ```
 Detecting non-letters
- CharacterSet#cover?: 12263689.1 i/s
-       Regexp#match?:  4940889.9 i/s - 2.48x slower
+ CharacterSet#cover?: 17565596.9 i/s
+       Regexp#match?:  7951108.0 i/s - 2.21x slower
 ```
 ```
-Removing whitespace
+Removing ASCII whitespace
-CharacterSet#delete_in:  2406722.6 i/s
-         String#gsub:   235760.3 i/s - 10.21x slower
+CharacterSet#delete_in:  6306078.2 i/s
+           String#tr:  4734401.0 i/s - 1.33x slower
+         String#gsub:   211631.8 i/s - 29.80x slower
 ```
 ```
 Removing whitespace, emoji and umlauts
-CharacterSet#delete_in:  1653607.6 i/s
-         String#gsub:   272782.9 i/s - 6.06x slower
+CharacterSet#delete_in:  5984149.6 i/s
+           String#tr:   363643.1 i/s - 16.46x slower
+         String#gsub:   317201.7 i/s - 18.87x slower
 ```
 ```
 Removing non-whitespace
-CharacterSet#keep_in:  2671038.2 i/s
-         String#gsub:   242551.0 i/s - 11.01x slower
+CharacterSet#keep_in:  7650925.6 i/s
+         String#gsub:   207374.6 i/s - 36.89x slower
+           String#tr:       12.3 i/s - 619745.60x slower
 ```
 ```
-Extracting emoji
+Keeping only emoji
-CharacterSet#keep_in:  1726496.5 i/s
-         String#gsub:   215609.2 i/s - 8.01x slower
+CharacterSet#keep_in:  7272940.1 i/s
+         String#gsub:   177993.8 i/s - 40.86x slower
+           String#tr:       12.3 i/s - 590222.71x slower
 ```
 ```
 Extracting emoji to an Array
-   CharacterSet#scan:  2373856.1 i/s
-         String#scan:   480000.5 i/s - 4.95x slower
+   CharacterSet#scan:  2978285.0 i/s
+         String#scan:   865793.8 i/s - 3.44x slower
 ```
 ```
 Detecting whitespace
-CharacterSet#used_by?: 11988328.7 i/s
-       Regexp#match?:  6758146.8 i/s - 1.77x slower
+CharacterSet#used_by?: 17292338.4 i/s
+       Regexp#match?: 11705563.9 i/s - 1.48x slower
 ```
 ```
 Detecting emoji in a large string
-CharacterSet#used_by?:   288223.3 i/s
-       Regexp#match?:   102384.2 i/s - 2.82x slower
+CharacterSet#used_by?:   340444.1 i/s
+       Regexp#match?:   180549.8 i/s - 1.89x slower
 ```
 ```
 Adding entries
-    CharacterSet#add:  2538251.2 i/s
-       SortedSet#add:   443925.9 i/s - 5.72x slower
+    CharacterSet#add:  4951781.4 i/s
+       SortedSet#add:  1019637.9 i/s - 4.86x slower
 ```
 ```
 Removing entries
- CharacterSet#delete:  2487620.8 i/s
-    SortedSet#delete:   628816.1 i/s - 3.96x slower
+ CharacterSet#delete:  5006337.6 i/s
+    SortedSet#delete:  3922752.2 i/s - same-ish
 ```
 ```
 Merging entries
-  CharacterSet#merge:      551.6 i/s
-     SortedSet#merge:        1.4 i/s - 393.59x slower
+  CharacterSet#merge:      661.8 i/s
+     SortedSet#merge:        3.9 i/s - 167.82x slower
 ```
 ```
 Getting the min and max
- CharacterSet#minmax:   636890.7 i/s
-    SortedSet#minmax:      254.1 i/s - 2506.20x slower
+ CharacterSet#minmax:  1212462.2 i/s
+    SortedSet#minmax:      844.4 i/s - 1435.93x slower
 ```

data/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,38 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
+## [1.6.0] - 2022-02-16
+### Added
+- `::of` now supports both `String` and `Regexp` arguments
+### Fixed
+- fixed segfault during `String` manipulation on Ruby 3.2.0-dev
+- improved performance for `String` manipulation
+- allow usage in Ractors
+  - predefined sets must be pre-initialized for this, though
+  - e.g. `CharacterSet.ascii`, `keep_character_set(:ascii)` etc.
+  - call them once in the main Ractor to trigger initialization
+## [1.5.0] - 2021-12-05
+### Added
+- new codepoints for `::assigned` and `::emoji` predefined sets, as in Ruby 3.1.0
+- latest unicode case-folding data (for `#case_insensitive`)
+- support for passing any Enumerable to `#disjoint?`, `#intersect?`
+  - this matches recent broadening of these methods in `ruby/set`
+- new instance method `#secure_token` (see README)
+- class method `::of` now accepts more than one `String`
+- `CharacterSet::ExpressionConverter` can now build output of any Set-like class
+### Fixed
+- `CharacterSet::Pure::of_expression` now returns a `CharacterSet::Pure`
+  - it used to return a regular `CharacterSet`
 ## [1.4.1] - 2020-01-10
 ### Fixed

data/Gemfile CHANGED Viewed

@@ -4,3 +4,17 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
 # Specify your gem's dependencies in character_set.gemspec
 gemspec
+gem 'benchmark-ips', '~> 2.7'
+gem 'get_process_mem', '~> 0.2.3'
+gem 'rake', '~> 13.0'
+gem 'rake-compiler', '~> 1.1'
+gem 'range_compressor', '~> 1.0'
+gem 'regexp_parser', '~> 2.1'
+gem 'regexp_property_values', '~> 1.0'
+gem 'rspec', '~> 3.8'
+if RUBY_VERSION.to_f >= 2.7
+  gem 'codecov', '~> 0.2.12'
+  gem 'gouteur', '~> 1.0.0'
+  gem 'rubocop', '~> 1.8'
+end

data/README.md CHANGED Viewed

@@ -2,17 +2,20 @@
 [![Gem Version](https://badge.fury.io/rb/character_set.svg)](http://badge.fury.io/rb/character_set)
 [![Build Status](https://github.com/jaynetics/character_set/workflows/tests/badge.svg)](https://github.com/jaynetics/character_set/actions)
+[![Build Status](https://github.com/jaynetics/character_set/workflows/gouteur/badge.svg)](https://github.com/jaynetics/character_set/actions)
 [![codecov](https://codecov.io/gh/jaynetics/character_set/branch/master/graph/badge.svg)](https://codecov.io/gh/jaynetics/character_set)
-This is a C-extended Ruby gem to work with sets of Unicode codepoints. It can read and write these sets in various formats and implements the stdlib `Set` interface for them.
+This is a C-extended Ruby gem to work with sets of Unicode codepoints.
-It also offers an alternate paradigm of `String` processing which grants much better performance than `Regexp` and `String` methods from the stdlib where applicable (see [benchmarks](./BENCHMARK.md)).
+It can [read](#parseinitialize) and [write](#write) sets of codepoints in various formats and it implements the stdlib `Set` interface for them.
+It also offers a [way of scrubbing and scanning characters in Strings](#interact-with-strings) that is more semantic and consistently offers better performance than `Regexp` and `String` methods from the stdlib for this (see [benchmarks](./BENCHMARK.md)).
 Many parts can be used independently, e.g.:
 - `CharacterSet::Character`
+- `CharacterSet::ExpressionConverter`
 - `CharacterSet::Parser`
 - `CharacterSet::Writer`
-- [`RangeCompressor`](https://github.com/jaynetics/range_compressor)
 ## Usage
@@ -40,9 +43,10 @@ CharacterSet.parse('[a-c]')
 CharacterSet.parse('\U00000061-\U00000063')
 ```
-If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/jaynetics/regexp_property_values) are installed, `::of_regexp` and `::of_property` can also be used. `::of_regexp` can handle intersections, negations, and set nesting. Regexp's `i`-flag is ignored; call `#case_insensitive` on the result if needed.
+If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/jaynetics/regexp_property_values) are installed, `Regexp` and unicode property names can also be read. Regexp intersections, negations, and set nesting are covered, but the `i`-flag is ignored; call `#case_insensitive` on the result if needed.
 ```ruby
+CharacterSet.of(/./) # => #<CharacterSet (size: 1112064)>
 CharacterSet.of_property('Thai') # => #<CharacterSet (size: 86)>
 require 'character_set/core_ext/regexp_ext'
@@ -143,6 +147,7 @@ CharacterSet['1', 'A'].case_insensitive # => CharacterSet['1', 'A', 'a']
 ```
 ### Write
 ```ruby
 set = CharacterSet['a', 'b', 'c', 'j', '-']
@@ -181,7 +186,18 @@ set.to_s_with_surrogate_alternation
 # => '(?:[ab]|\uD83E\uDD29|\uD83E\uDD2A|\uD83E\uDD2B)'
 ```
-### Unicode plane methods
+### Other features
+#### Secure tokens
+Generate secure random strings of characters from a set:
+```ruby
+CharacterSet.new('a'..'z').secure_token(8) # => "ugwpujmt"
+CharacterSet.crypt.secure_token # => "8.1w7aBT737/pMfcMoO4y2y8/=0xtmo:"
+```
+#### Unicode planes
 There are some methods to check for planes and to handle ASCII, [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
 ```Ruby
@@ -198,6 +214,6 @@ CharacterSet['a', 'ü', '🤩'].member_in_plane?(7) # => false
 CharacterSet::Character.new('a').plane # => 0
 ```
-### Contributions
+## Contributions
 Feel free to send suggestions, point out issues, or submit pull requests.

data/Rakefile CHANGED Viewed

@@ -147,8 +147,11 @@ namespace :benchmark do
       f.puts "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}", ''
       $store_comparison_results.each do |caption, result|
-        f.puts '```', caption, '',
-               result.strip.gsub(/(same-ish).*$/, '\1').lines[1..-1], '```'
+        f.puts '```',
+               caption,
+               '',
+               result.strip.gsub(/ \(±[^)]+\) /, '').gsub(/(same-ish).*$/, '\1').lines[1..-1],
+               '```'
       end
     end
   end

data/benchmarks/delete_in.rb CHANGED Viewed

@@ -2,24 +2,28 @@ require_relative './shared'
 str = 'Lorem     ipsum       et      dolorem'
 rx = /\s/
+trt = "\t\n\v\f\r\s"
 cs = CharacterSet.whitespace
 benchmark(
-  caption: 'Removing whitespace',
+  caption: 'Removing ASCII whitespace',
   cases: {
     'String#gsub'            => -> { str.gsub(rx, '') },
+    'String#tr'              => -> { str.tr(trt, '') },
     'CharacterSet#delete_in' => -> { cs.delete_in(str) },
   }
 )
 str = 'Lörem ipsüm ⛷ et dölörem'
 rx = /[\s\p{emoji}äüö]/
+trt = "\t\n\v\f\r\s😀-🙏äüö"
 cs = CharacterSet.whitespace + CharacterSet.emoji + CharacterSet['ä', 'ö', 'ü']
 benchmark(
   caption: 'Removing whitespace, emoji and umlauts',
   cases: {
     'String#gsub'            => -> { str.gsub(rx, '') },
+    'String#tr'              => -> { str.tr(trt, '') },
     'CharacterSet#delete_in' => -> { cs.delete_in(str) },
   }
 )

data/benchmarks/keep_in.rb CHANGED Viewed

@@ -2,24 +2,28 @@ require_relative './shared'
 str = 'Lorem ipsum et dolorem'
 rx = /\S/
+trt = "\u{0080}-\u{10FFFF}" # approximation
 cs = CharacterSet.whitespace
 benchmark(
   caption: 'Removing non-whitespace',
   cases: {
     'String#gsub'          => -> { str.gsub(rx, '') },
+    'String#tr'            => -> { str.tr(trt, '') },
     'CharacterSet#keep_in' => -> { cs.keep_in(str) },
   }
 )
 str = 'Lorem ipsum ⛷ et dolorem'
 rx = /\p{^emoji}/
+trt = "\u0000-\u{1F599}\u{1F650}-\u{10FFFF}"
 cs = CharacterSet.emoji
 benchmark(
-  caption: 'Extracting emoji',
+  caption: 'Keeping only emoji',
   cases: {
     'String#gsub'          => -> { str.gsub(rx, '') },
+    'String#tr'            => -> { str.tr(trt, '') },
     'CharacterSet#keep_in' => -> { cs.keep_in(str) },
   }
 )

data/character_set.gemspec CHANGED Viewed

@@ -28,17 +28,4 @@ Gem::Specification.new do |s|
   if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
     s.add_dependency 'sorted_set', '~> 1.0'
   end
-  s.add_development_dependency 'benchmark-ips', '~> 2.7'
-  s.add_development_dependency 'get_process_mem', '~> 0.2.3'
-  s.add_development_dependency 'rake', '~> 13.0'
-  s.add_development_dependency 'rake-compiler', '~> 1.1'
-  s.add_development_dependency 'range_compressor', '~> 1.0'
-  s.add_development_dependency 'regexp_parser', '~> 1.6'
-  s.add_development_dependency 'regexp_property_values', '~> 1.0'
-  s.add_development_dependency 'rspec', '~> 3.8'
-  if RUBY_VERSION.to_f >= 2.7
-    s.add_development_dependency 'codecov', '~> 0.2.12'
-    s.add_development_dependency 'rubocop', '~> 1.8'
-  end
 end

data/ext/character_set/character_set.c CHANGED Viewed

@@ -82,7 +82,11 @@ static const rb_data_type_t cs_type = {
         .dsize = cs_memsize,
     },
     .data = NULL,
+#ifdef RUBY_TYPED_FROZEN_SHAREABLE
+    .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
+#else
     .flags = RUBY_TYPED_FREE_IMMEDIATELY,
+#endif
 };
 static inline VALUE
@@ -315,9 +319,9 @@ cs_method_minmax(VALUE self)
     cs_cp cp, alen, blen;                                        \
     cs_ar *acps, *bcps;                                          \
     struct cs_data *new_data;                                    \
-    new_cs = cs_alloc(RBASIC(self)->klass, &new_data);           \
     acps = cs_fetch_cps(cs_a, &alen);                            \
     bcps = cs_fetch_cps(cs_b, &blen);                            \
+    new_cs = cs_alloc(RBASIC(self)->klass, &new_data);           \
     for (cp = 0; cp < UNICODE_CP_COUNT; cp++)                    \
     {                                                            \
       if (tst_cp(acps, alen, cp) comp_op tst_cp(bcps, blen, cp)) \
@@ -1046,13 +1050,14 @@ raise_arg_err_unless_string(VALUE val)
 }
 static VALUE
-cs_class_method_of(VALUE self, VALUE str)
+cs_class_method_of_string(VALUE self, VALUE string)
 {
   VALUE new_cs;
   struct cs_data *new_data;
+  raise_arg_err_unless_string(string);
   new_cs = cs_alloc(self, &new_data);
-  raise_arg_err_unless_string(str);
-  each_cp(str, add_str_cp_to_arr, 0, 0, new_data, 0);
+  each_cp(string, add_str_cp_to_arr, 0, 0, new_data, 0);
   return new_cs;
 }
@@ -1133,116 +1138,76 @@ cs_method_used_by_p(VALUE self, VALUE str)
   return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
 }
-static void
-cs_str_buf_cat(VALUE str, const char *ptr, long len)
-{
-  long total, olen;
-  char *sptr;
-  RSTRING_GETMEM(str, sptr, olen);
-  sptr = RSTRING(str)->as.heap.ptr;
-  olen = RSTRING(str)->as.heap.len;
-  total = olen + len;
-  memcpy(sptr + olen, ptr, len);
-  RSTRING(str)->as.heap.len = total;
-}
-#ifndef TERM_FILL
-#define TERM_FILL(ptr, termlen)                     \
-  do                                                \
-  {                                                 \
-    char *const term_fill_ptr = (ptr);              \
-    const int term_fill_len = (termlen);            \
-    *term_fill_ptr = '\0';                          \
-    if (__builtin_expect(!!(term_fill_len > 1), 0)) \
-      memset(term_fill_ptr, 0, term_fill_len);      \
-  } while (0)
-#endif
-static void
-cs_str_buf_terminate(VALUE str, rb_encoding *enc)
-{
-  char *ptr;
-  long len;
-  ptr = RSTRING(str)->as.heap.ptr;
-  len = RSTRING(str)->as.heap.len;
-  TERM_FILL(ptr + len, rb_enc_mbminlen(enc));
-}
+// partially based on rb_str_delete_bang
 static inline VALUE
 cs_apply_to_str(VALUE set, VALUE str, int delete, int bang)
 {
   cs_ar *cps;
-  cs_cp len;
-  rb_encoding *str_enc;
-  VALUE orig_len, new_str_buf;
-  int cp_len;
-  unsigned int str_cp;
-  const char *ptr, *end;
+  cs_cp cs_len;
+  VALUE orig_str_len;
+  rb_encoding *enc;
+  char *s, *send, *t;
+  int ascompat, cr;
   raise_arg_err_unless_string(str);
-  cps = cs_fetch_cps(set, &len);
+  orig_str_len = RSTRING_LEN(str);
-  orig_len = RSTRING_LEN(str);
-  if (orig_len < 1) // empty string, will never change
+  if (orig_str_len == 0)
   {
-    if (bang)
-    {
-      return Qnil;
-    }
-    return rb_str_dup(str);
+    return bang ? Qnil : str;
   }
-  new_str_buf = rb_str_buf_new(orig_len + 30); // len + margin
-  str_enc = rb_enc_get(str);
-  rb_enc_associate(new_str_buf, str_enc);
-  rb_str_modify(new_str_buf);
-  ENC_CODERANGE_SET(new_str_buf, rb_enc_asciicompat(str_enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID);
-  ptr = RSTRING_PTR(str);
-  end = RSTRING_END(str);
+  if (!bang)
+  {
+    str = rb_str_dup(str);
+  }
-  if (single_byte_optimizable(str))
+  cps = cs_fetch_cps(set, &cs_len);
+  rb_str_modify(str);
+  enc = rb_enc_get(str);
+  ascompat = rb_enc_asciicompat(enc);
+  s = t = RSTRING_PTR(str);
+  send = RSTRING_END(str);
+  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
+  while (s < send)
   {
-    while (ptr < end)
+    unsigned int c;
+    int clen;
+    if (ascompat && (c = *(unsigned char *)s) < 0x80)
     {
-      str_cp = *ptr & 0xff;
-      if ((!tst_cp(cps, len, str_cp)) == delete)
+      if (tst_cp(cps, cs_len, c) != delete)
       {
-        cs_str_buf_cat(new_str_buf, ptr, 1);
+        if (t != s)
+          *t = c;
+        t++;
       }
-      ptr++;
+      s++;
     }
-  }
-  else // likely to be multibyte string
-  {
-    while (ptr < end)
+    else
     {
-      str_cp = rb_enc_codepoint_len(ptr, end, &cp_len, str_enc);
-      if ((!tst_cp(cps, len, str_cp)) == delete)
+      c = rb_enc_codepoint_len(s, send, &clen, enc);
+      if (tst_cp(cps, cs_len, c) != delete)
       {
-        cs_str_buf_cat(new_str_buf, ptr, cp_len);
+        if (t != s)
+          rb_enc_mbcput(c, t, enc);
+        t += clen;
+        if (cr == ENC_CODERANGE_7BIT)
+          cr = ENC_CODERANGE_VALID;
       }
-      ptr += cp_len;
+      s += clen;
     }
   }
-  cs_str_buf_terminate(new_str_buf, str_enc);
+  rb_str_set_len(str, t - RSTRING_PTR(str));
+  ENC_CODERANGE_SET(str, cr);
-  if (bang)
-  {
-    if (RSTRING_LEN(new_str_buf) == (long)orig_len) // string unchanged
-    {
-      return Qnil;
-    }
-    rb_str_shared_replace(str, new_str_buf);
-  }
-  else
+  if (bang && (RSTRING_LEN(str) == (long)orig_str_len)) // string unchanged
   {
-    RB_OBJ_WRITE(new_str_buf, &(RBASIC(new_str_buf))->klass, rb_obj_class(str));
-    str = new_str_buf;
+    return Qnil;
   }
   return str;
@@ -1284,6 +1249,10 @@ cs_method_allocated_length(VALUE self)
 void Init_character_set()
 {
+#ifdef HAVE_RB_EXT_RACTOR_SAFE
+  rb_ext_ractor_safe(true);
+#endif
   VALUE cs = rb_define_class("CharacterSet", rb_cObject);
   rb_define_alloc_func(cs, cs_method_allocate);
@@ -1338,7 +1307,7 @@ void Init_character_set()
   // `CharacterSet`-specific methods
   rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
-  rb_define_singleton_method(cs, "of", cs_class_method_of, 1);
+  rb_define_singleton_method(cs, "of_string", cs_class_method_of_string, 1);
   rb_define_method(cs, "ranges", cs_method_ranges, 0);
   rb_define_method(cs, "sample", cs_method_sample, -1);