RubyGems - regexp_parser - Versions diffs - 2.0.2 → 2.2.0 - Mend

regexp_parser 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +54 -0
data/Gemfile +5 -1
data/README.md +15 -21
data/Rakefile +11 -17
data/lib/regexp_parser/error.rb +4 -0
data/lib/regexp_parser/expression/base.rb +123 -0
data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
data/lib/regexp_parser/expression/classes/group.rb +6 -1
data/lib/regexp_parser/expression/classes/literal.rb +1 -5
data/lib/regexp_parser/expression/classes/property.rb +1 -3
data/lib/regexp_parser/expression/classes/root.rb +0 -1
data/lib/regexp_parser/expression/classes/type.rb +0 -2
data/lib/regexp_parser/expression/quantifier.rb +2 -2
data/lib/regexp_parser/expression/sequence.rb +3 -10
data/lib/regexp_parser/expression/subexpression.rb +1 -2
data/lib/regexp_parser/expression.rb +7 -130
data/lib/regexp_parser/lexer.rb +7 -5
data/lib/regexp_parser/parser.rb +282 -334
data/lib/regexp_parser/scanner/properties/long.yml +13 -0
data/lib/regexp_parser/scanner/properties/short.yml +9 -1
data/lib/regexp_parser/scanner/scanner.rl +64 -87
data/lib/regexp_parser/scanner.rb +1024 -1073
data/lib/regexp_parser/syntax/any.rb +2 -4
data/lib/regexp_parser/syntax/base.rb +10 -10
data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
data/lib/regexp_parser/syntax/token/escape.rb +31 -0
data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
data/lib/regexp_parser/syntax/token.rb +45 -0
data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
data/lib/regexp_parser/syntax.rb +8 -6
data/lib/regexp_parser/token.rb +9 -20
data/lib/regexp_parser/version.rb +1 -1
data/lib/regexp_parser.rb +0 -2
data/spec/expression/clone_spec.rb +36 -4
data/spec/expression/free_space_spec.rb +2 -2
data/spec/expression/methods/match_length_spec.rb +2 -2
data/spec/lexer/nesting_spec.rb +2 -2
data/spec/lexer/refcalls_spec.rb +5 -0
data/spec/parser/all_spec.rb +2 -2
data/spec/parser/escapes_spec.rb +43 -31
data/spec/parser/properties_spec.rb +6 -4
data/spec/parser/refcalls_spec.rb +5 -0
data/spec/parser/set/ranges_spec.rb +26 -16
data/spec/scanner/escapes_spec.rb +29 -20
data/spec/scanner/refcalls_spec.rb +19 -0
data/spec/scanner/sets_spec.rb +66 -23
data/spec/spec_helper.rb +13 -1
data/spec/support/capturing_stderr.rb +9 -0
data/spec/syntax/versions/1.8.6_spec.rb +2 -2
data/spec/syntax/versions/2.0.0_spec.rb +2 -2
data/spec/syntax/versions/aliases_spec.rb +1 -0
metadata +27 -26
data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
data/lib/regexp_parser/syntax/tokens.rb +0 -45
data/spec/support/runner.rb +0 -42
data/spec/support/warning_extractor.rb +0 -60

data/lib/regexp_parser/syntax/token.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# Define the base module and the simplest of tokens.
+module Regexp::Syntax
+  module Token
+    Map = {}
+    module Literal
+      All = %i[literal]
+      Type = :literal
+    end
+    module FreeSpace
+      All  = %i[comment whitespace]
+      Type = :free_space
+    end
+    Map[FreeSpace::Type] = FreeSpace::All
+    Map[Literal::Type]   = Literal::All
+  end
+end
+# Load all the token files, they will populate the Map constant.
+require 'regexp_parser/syntax/token/anchor'
+require 'regexp_parser/syntax/token/assertion'
+require 'regexp_parser/syntax/token/backreference'
+require 'regexp_parser/syntax/token/posix_class'
+require 'regexp_parser/syntax/token/character_set'
+require 'regexp_parser/syntax/token/character_type'
+require 'regexp_parser/syntax/token/conditional'
+require 'regexp_parser/syntax/token/escape'
+require 'regexp_parser/syntax/token/group'
+require 'regexp_parser/syntax/token/keep'
+require 'regexp_parser/syntax/token/meta'
+require 'regexp_parser/syntax/token/quantifier'
+require 'regexp_parser/syntax/token/unicode_property'
+# After loading all the tokens the map is full. Extract all tokens and types
+# into the All and Types constants.
+module Regexp::Syntax
+  module Token
+    All   = Map.values.flatten.uniq.sort.freeze
+    Types = Map.keys.freeze
+  end
+end

data/lib/regexp_parser/syntax/version_lookup.rb CHANGED Viewed

@@ -3,13 +3,13 @@ module Regexp::Syntax
   VERSION_REGEXP = /#{VERSION_FORMAT}/
   VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
-  class InvalidVersionNameError < SyntaxError
+  class InvalidVersionNameError < Regexp::Syntax::SyntaxError
     def initialize(name)
       super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
     end
   end
-  class UnknownSyntaxNameError < SyntaxError
+  class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
     def initialize(name)
       super "Unknown syntax name '#{name}'."
     end

data/lib/regexp_parser/syntax/versions/1.8.6.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Regexp::Syntax
       implements :anchor, Anchor::All
       implements :assertion, Assertion::Lookahead
-      implements :backref, [:number]
+      implements :backref, Backreference::Plain
       implements :posixclass, PosixClass::Standard
       implements :group, Group::All
       implements :meta, Meta::Extended

data/lib/regexp_parser/syntax/versions/3.1.0.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module Regexp::Syntax
+  class V3_1_0 < Regexp::Syntax::V2_6_3
+    def initialize
+      super
+      implements :property,    UnicodeProperty::V3_1_0
+      implements :nonproperty, UnicodeProperty::V3_1_0
+    end
+  end
+end

data/lib/regexp_parser/syntax.rb CHANGED Viewed

@@ -1,9 +1,11 @@
-require File.expand_path('../syntax/tokens', __FILE__)
-require File.expand_path('../syntax/base', __FILE__)
-require File.expand_path('../syntax/any', __FILE__)
-require File.expand_path('../syntax/version_lookup', __FILE__)
-require File.expand_path('../syntax/versions', __FILE__)
+require 'regexp_parser/error'
 module Regexp::Syntax
-  class SyntaxError < StandardError; end
+  class SyntaxError < Regexp::Parser::Error; end
 end
+require_relative 'syntax/token'
+require_relative 'syntax/base'
+require_relative 'syntax/any'
+require_relative 'syntax/version_lookup'
+require_relative 'syntax/versions'

data/lib/regexp_parser/token.rb CHANGED Viewed

@@ -1,14 +1,13 @@
 class Regexp
-  TOKEN_KEYS = [
-    :type,
-    :token,
-    :text,
-    :ts,
-    :te,
-    :level,
-    :set_level,
-    :conditional_level
+  TOKEN_KEYS = %i[
+    type
+    token
+    text
+    ts
+    te
+    level
+    set_level
+    conditional_level
   ].freeze
   Token = Struct.new(*TOKEN_KEYS) do
@@ -21,15 +20,5 @@ class Regexp
     def length
       te - ts
     end
-    if RUBY_VERSION < '2.0.0'
-      def to_h
-        members.inject({}) do |hash, member|
-          hash[member.to_sym] = self[member]
-          hash
-        end
-      end
-    end
   end
 end

data/lib/regexp_parser/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 class Regexp
   class Parser
-    VERSION = '2.0.2'
+    VERSION = '2.2.0'
   end
 end

data/lib/regexp_parser.rb CHANGED Viewed

@@ -1,5 +1,3 @@
-# encoding: utf-8
 require 'regexp_parser/version'
 require 'regexp_parser/token'
 require 'regexp_parser/scanner'

data/spec/expression/clone_spec.rb CHANGED Viewed

@@ -27,8 +27,8 @@ RSpec.describe('Expression#clone') do
     expect(root_2.quantifier.object_id).not_to eq copy_2.quantifier.object_id
     # regression test
-    expect { root_2.clone }.not_to change { root_2.quantifier.object_id }
-    expect { root_2.clone }.not_to change { root_2.quantifier.text.object_id }
+    expect { root_2.clone }.not_to(change { root_2.quantifier.object_id })
+    expect { root_2.clone }.not_to(change { root_2.quantifier.text.object_id })
   end
   specify('Subexpression#clone') do
@@ -48,7 +48,7 @@ RSpec.describe('Expression#clone') do
     end
     # regression test
-    expect { root.clone }.not_to change { root.expressions.object_id }
+    expect { root.clone }.not_to(change { root.expressions.object_id })
   end
   specify('Group::Named#clone') do
@@ -69,7 +69,39 @@ RSpec.describe('Expression#clone') do
     end
     # regression test
-    expect { root_1.clone }.not_to change { root_1.name.object_id }
+    expect { root_1.clone }.not_to(change { root_1.name.object_id })
+  end
+  specify('Group::Options#clone') do
+    root = RP.parse('foo(?i)bar')
+    copy = root.clone
+    expect(copy.to_s).to eq root.to_s
+    root_1 = root[1]
+    copy_1 = copy[1]
+    expect(root_1.option_changes).to eq copy_1.option_changes
+    expect(root_1.option_changes.object_id).not_to eq copy_1.option_changes.object_id
+    # regression test
+    expect { root_1.clone }.not_to(change { root_1.option_changes.object_id })
+  end
+  specify('Backreference::Base#clone') do
+    root = RP.parse('(foo)\1')
+    copy = root.clone
+    expect(copy.to_s).to eq root.to_s
+    root_1 = root[1]
+    copy_1 = copy[1]
+    expect(root_1.referenced_expression.to_s).to eq copy_1.referenced_expression.to_s
+    expect(root_1.referenced_expression.object_id).not_to eq copy_1.referenced_expression.object_id
+    # regression test
+    expect { root_1.clone }.not_to(change { root_1.referenced_expression.object_id })
   end
   specify('Sequence#clone') do

data/spec/expression/free_space_spec.rb CHANGED Viewed

@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
     space = root[0]
     expect(space).to be_instance_of(FreeSpace::WhiteSpace)
-    expect { space.quantify(:dummy, '#') }.to raise_error(RuntimeError)
+    expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
   end
   specify('comment quantify raises error') do
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
     comment = root[3]
     expect(comment).to be_instance_of(FreeSpace::Comment)
-    expect { comment.quantify(:dummy, '#') }.to raise_error(RuntimeError)
+    expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
   end
 end

data/spec/expression/methods/match_length_spec.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 require 'spec_helper'
-RSpec.describe(Regexp::MatchLength) do
-  ML = described_class
+ML = Regexp::MatchLength
+RSpec.describe(Regexp::MatchLength) do
   specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
   specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
   specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }

data/spec/lexer/nesting_spec.rb CHANGED Viewed

@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
     4 => [:literal,     :literal,       'e',      4,  5, 0, 1, 0],
     5 => [:set,         :close,         ']',      5,  6, 0, 0, 0]
-  include_examples 'lex', /[[:word:]&&[^c]z]/,
+  include_examples 'lex', '[[:word:]&&[^c]z]',
     0 => [:set,         :open,          '[',          0,  1, 0, 0, 0],
     1 => [:posixclass,  :word,          '[:word:]',   1,  9, 0, 1, 0],
     2 => [:set,         :intersection,  '&&',         9, 11, 0, 1, 0],
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
     7 => [:literal,     :literal,       'z',         15, 16, 0, 1, 0],
     8 => [:set,         :close,         ']',         16, 17, 0, 0, 0]
-  include_examples 'lex', /[\p{word}&&[^c]z]/,
+  include_examples 'lex', '[\p{word}&&[^c]z]',
     0 => [:set,         :open,          '[',          0,  1, 0, 0, 0],
     1 => [:property,    :word,          '\p{word}',   1,  9, 0, 1, 0],
     2 => [:set,         :intersection,  '&&',         9, 11, 0, 1, 0],

data/spec/lexer/refcalls_spec.rb CHANGED Viewed

@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
   include_examples 'lex', "(abc)\\g'1'",
     3 => [:backref, :number_call,           "\\g'1'",     5, 10, 0, 0, 0]
+  include_examples 'lex', '\g<0>',
+    0 => [:backref, :number_call,           '\g<0>',      0,  5, 0, 0, 0]
+  include_examples 'lex', "\\g'0'",
+    0 => [:backref, :number_call,           "\\g'0'",     0,  5, 0, 0, 0]
   include_examples 'lex', '(abc)\g<-1>',
     3 => [:backref, :number_rel_call,       '\g<-1>',     5, 11, 0, 0, 0]
   include_examples 'lex', "(abc)\\g'-1'",

data/spec/parser/all_spec.rb CHANGED Viewed

@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
   end
   specify('parse no quantifier target raises error') do
-    expect { RP.parse('?abc') }.to raise_error(ArgumentError)
+    expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
   end
   specify('parse sequence no quantifier target raises error') do
-    expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
+    expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
   end
 end

data/spec/parser/escapes_spec.rb CHANGED Viewed

@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
     expect { root[5].codepoint }.to raise_error(/#codepoints/)
   end
+  # Meta/control espaces
+  #
+  # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
+  # escapes can only be set with the Regexp::new constructor.
+  # In Regexp literals, these escapes are now pre-processed to hex escapes.
+  #
+  # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
+  def parse_meta_control(regexp_body)
+    regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
+    RP.parse(regexp)
+  end
   specify('parse escape control sequence lower') do
-    root = RP.parse(/a\\\c2b/)
+    root = parse_meta_control('a\\\\\c2b')
     expect(root[2]).to be_instance_of(EscapeSequence::Control)
     expect(root[2].text).to eq '\\c2'
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
   end
   specify('parse escape control sequence upper') do
-    root = RP.parse(/\d\\\C-C\w/)
+    root = parse_meta_control('\d\C-C\w')
-    expect(root[2]).to be_instance_of(EscapeSequence::Control)
-    expect(root[2].text).to eq '\\C-C'
-    expect(root[2].char).to eq "\x03"
-    expect(root[2].codepoint).to eq 3
+    expect(root[1]).to be_instance_of(EscapeSequence::Control)
+    expect(root[1].text).to eq '\\C-C'
+    expect(root[1].char).to eq "\x03"
+    expect(root[1].codepoint).to eq 3
   end
   specify('parse escape meta sequence') do
-    root = RP.parse(/\Z\\\M-Z/n)
+    root = parse_meta_control('\Z\M-Z')
-    expect(root[2]).to be_instance_of(EscapeSequence::Meta)
-    expect(root[2].text).to eq '\\M-Z'
-    expect(root[2].char).to eq "\u00DA"
-    expect(root[2].codepoint).to eq 218
+    expect(root[1]).to be_instance_of(EscapeSequence::Meta)
+    expect(root[1].text).to eq '\\M-Z'
+    expect(root[1].char).to eq "\u00DA"
+    expect(root[1].codepoint).to eq 218
   end
   specify('parse escape meta control sequence') do
-    root = RP.parse(/\A\\\M-\C-X/n)
+    root = parse_meta_control('\A\M-\C-X')
-    expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
-    expect(root[2].text).to eq '\\M-\\C-X'
-    expect(root[2].char).to eq "\u0098"
-    expect(root[2].codepoint).to eq 152
+    expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
+    expect(root[1].text).to eq '\\M-\\C-X'
+    expect(root[1].char).to eq "\u0098"
+    expect(root[1].codepoint).to eq 152
   end
   specify('parse lower c meta control sequence') do
-    root = RP.parse(/\A\\\M-\cX/n)
+    root = parse_meta_control('\A\M-\cX')
-    expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
-    expect(root[2].text).to eq '\\M-\\cX'
-    expect(root[2].char).to eq "\u0098"
-    expect(root[2].codepoint).to eq 152
+    expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
+    expect(root[1].text).to eq '\\M-\\cX'
+    expect(root[1].char).to eq "\u0098"
+    expect(root[1].codepoint).to eq 152
   end
   specify('parse escape reverse meta control sequence') do
-    root = RP.parse(/\A\\\C-\M-X/n)
+    root = parse_meta_control('\A\C-\M-X')
-    expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
-    expect(root[2].text).to eq '\\C-\\M-X'
-    expect(root[2].char).to eq "\u0098"
-    expect(root[2].codepoint).to eq 152
+    expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
+    expect(root[1].text).to eq '\\C-\\M-X'
+    expect(root[1].char).to eq "\u0098"
+    expect(root[1].codepoint).to eq 152
   end
   specify('parse escape reverse lower c meta control sequence') do
-    root = RP.parse(/\A\\\c\M-X/n)
+    root = parse_meta_control('\A\c\M-X')
-    expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
-    expect(root[2].text).to eq '\\c\\M-X'
-    expect(root[2].char).to eq "\u0098"
-    expect(root[2].codepoint).to eq 152
+    expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
+    expect(root[1].text).to eq '\\c\\M-X'
+    expect(root[1].char).to eq "\u0098"
+    expect(root[1].codepoint).to eq 152
   end
 end

data/spec/parser/properties_spec.rb CHANGED Viewed

@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
     end
   end
-  specify('parse all properties of current ruby') do
-    unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
-      RP.parse("\\p{#{prop}}") rescue false
+  if ruby_version_at_least('2.7.0')
+    specify('parse all properties of current ruby') do
+      unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
+        RP.parse("\\p{#{prop}}") rescue false
+      end
+      expect(unsupported).to be_empty
     end
-    expect(unsupported).to be_empty
   end
   specify('parse property negative') do

data/spec/parser/refcalls_spec.rb CHANGED Viewed

@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
   include_examples 'parse', /(abc)\g'1'/,
     1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
+  include_examples 'parse', '\g<0>',
+    0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
+  include_examples 'parse', "\\g'0'",
+    0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
   include_examples 'parse', /(abc)\g<-1>/,
     1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
   include_examples 'parse', /(abc)\g'-1'/,

data/spec/parser/set/ranges_spec.rb CHANGED Viewed

@@ -1,6 +1,10 @@
 require 'spec_helper'
 RSpec.describe('CharacterSet::Range parsing') do
+  # Some edge-case patterns are evaluated with #match to make sure that
+  # their behavior still reflects the way they are parsed.
+  # #capturing_stderr is used to skip any warnings generated by this.
   specify('parse set range') do
     root = RP.parse('[a-z]')
     set = root[0]
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(range.first).to be_instance_of(Literal)
     expect(range.last.to_s).to eq 'z'
     expect(range.last).to be_instance_of(Literal)
-    expect(set).to match 'm'
+    capturing_stderr { expect(set).to match 'm' }
   end
   specify('parse set range hex') do
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(range.first).to be_instance_of(EscapeSequence::Hex)
     expect(range.last.to_s).to eq '\\x22'
     expect(range.last).to be_instance_of(EscapeSequence::Hex)
-    expect(set).to match "\x11"
+    capturing_stderr { expect(set).to match "\x11" }
   end
   specify('parse set range unicode') do
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
     expect(range.last.to_s).to eq '\\u1234'
     expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
-    expect(set).to match '\\u600'
+    capturing_stderr { expect(set).to match '\\u600' }
   end
   specify('parse set range edge case leading dash') do
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(set.count).to eq 1
     expect(range.count).to eq 2
-    expect(set).to match 'a'
+    capturing_stderr { expect(set).to match 'a' }
   end
   specify('parse set range edge case trailing dash') do
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(set.count).to eq 1
     expect(range.count).to eq 2
-    expect(set).to match '$'
+    capturing_stderr { expect(set).to match '$' }
   end
   specify('parse set range edge case leading negate') do
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
     set = root[0]
     expect(set.count).to eq 2
-    expect(set).to     match 'a'
-    expect(set).not_to match 'z'
+    capturing_stderr do
+      expect(set).to     match 'a'
+      expect(set).not_to match 'z'
+    end
   end
   specify('parse set range edge case trailing negate') do
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(set.count).to eq 1
     expect(range.count).to eq 2
-    expect(set).to match '$'
+    capturing_stderr { expect(set).to match '$' }
   end
   specify('parse set range edge case leading intersection') do
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(set.count).to eq 1
     expect(set.first.last.to_s).to eq '-bc'
-    expect(set).to     match '-'
-    expect(set).to     match 'b'
-    expect(set).not_to match 'a'
-    expect(set).not_to match 'c'
+    capturing_stderr do
+      expect(set).to     match '-'
+      expect(set).to     match 'b'
+      expect(set).not_to match 'a'
+      expect(set).not_to match 'c'
+    end
   end
   specify('parse set range edge case trailing intersection') do
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(set.count).to eq 1
     expect(set.first.first.to_s).to eq 'bc-'
-    expect(set).to     match '-'
-    expect(set).to     match 'b'
-    expect(set).not_to match 'a'
-    expect(set).not_to match 'c'
+    capturing_stderr do
+      expect(set).to     match '-'
+      expect(set).to     match 'b'
+      expect(set).not_to match 'a'
+      expect(set).not_to match 'c'
+    end
   end
 end

data/spec/scanner/escapes_spec.rb CHANGED Viewed

@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
   include_examples 'scan', /c\at/,            1 => [:escape,  :bell,             '\a',             1,  3]
   # not an escape outside a character set
-  include_examples 'scan', /c\bt/,            1 => [:anchor,  :word_boundary,    '\b',             1, 3]
+  include_examples 'scan', /c\bt/,            1 => [:anchor,  :word_boundary,    '\b',             1,  3]
   include_examples 'scan', /c\ft/,            1 => [:escape,  :form_feed,        '\f',             1,  3]
   include_examples 'scan', /c\nt/,            1 => [:escape,  :newline,          '\n',             1,  3]
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
   include_examples 'scan', 'a\u{640 0641}c',  1 => [:escape,  :codepoint_list,   '\u{640 0641}',   1,  13]
   include_examples 'scan', 'a\u{10FFFF}c',    1 => [:escape,  :codepoint_list,   '\u{10FFFF}',     1,  11]
-  include_examples 'scan', /a\cBc/,           1 => [:escape,  :control,          '\cB',            1,  4]
-  include_examples 'scan', /a\c^c/,           1 => [:escape,  :control,          '\c^',            1,  4]
-  include_examples 'scan', /a\c\n/,           1 => [:escape,  :control,          '\c\n',           1,  5]
-  include_examples 'scan', /a\c\\b/,          1 => [:escape,  :control,          '\c\\\\',         1,  5]
-  include_examples 'scan', /a\C-bc/,          1 => [:escape,  :control,          '\C-b',           1,  5]
-  include_examples 'scan', /a\C-^b/,          1 => [:escape,  :control,          '\C-^',           1,  5]
-  include_examples 'scan', /a\C-\nb/,         1 => [:escape,  :control,          '\C-\n',          1,  6]
-  include_examples 'scan', /a\C-\\b/,         1 => [:escape,  :control,          '\C-\\\\',        1,  6]
-  include_examples 'scan', /a\c\M-Bc/n,       1 => [:escape,  :control,          '\c\M-B',         1,  7]
-  include_examples 'scan', /a\C-\M-Bc/n,      1 => [:escape,  :control,          '\C-\M-B',        1,  8]
-  include_examples 'scan', /a\M-Bc/n,         1 => [:escape,  :meta_sequence,    '\M-B',           1,  5]
-  include_examples 'scan', /a\M-\cBc/n,       1 => [:escape,  :meta_sequence,    '\M-\cB',         1,  7]
-  include_examples 'scan', /a\M-\c^/n,        1 => [:escape,  :meta_sequence,    '\M-\c^',         1,  7]
-  include_examples 'scan', /a\M-\c\n/n,       1 => [:escape,  :meta_sequence,    '\M-\c\n',        1,  8]
-  include_examples 'scan', /a\M-\c\\/n,       1 => [:escape,  :meta_sequence,    '\M-\c\\\\',      1,  8]
-  include_examples 'scan', /a\M-\C-Bc/n,      1 => [:escape,  :meta_sequence,    '\M-\C-B',        1,  8]
-  include_examples 'scan', /a\M-\C-\\/n,      1 => [:escape,  :meta_sequence,    '\M-\C-\\\\',     1,  9]
   include_examples 'scan', 'ab\\\xcd',        1 => [:escape,  :backslash,        '\\\\',           2,  4]
   include_examples 'scan', 'ab\\\0cd',        1 => [:escape,  :backslash,        '\\\\',           2,  4]
   include_examples 'scan', 'ab\\\Kcd',        1 => [:escape,  :backslash,        '\\\\',           2,  4]
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
   include_examples 'scan', 'ab\^cd',          1 => [:escape,  :bol,              '\^',             2,  4]
   include_examples 'scan', 'ab\$cd',          1 => [:escape,  :eol,              '\$',             2,  4]
   include_examples 'scan', 'ab\[cd',          1 => [:escape,  :set_open,         '\[',             2,  4]
+  # Meta/control espaces
+  #
+  # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
+  # escapes can only be set with the Regexp::new constructor.
+  # In Regexp literals, these escapes are now pre-processed to hex escapes.
+  #
+  # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
+  n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
+  include_examples 'scan', 'a\cBc',           1 => [:escape,  :control,          '\cB',            1,  4]
+  include_examples 'scan', 'a\c^c',           1 => [:escape,  :control,          '\c^',            1,  4]
+  include_examples 'scan', 'a\c\n',           1 => [:escape,  :control,          '\c\n',           1,  5]
+  include_examples 'scan', 'a\c\\\\b',        1 => [:escape,  :control,          '\c\\\\',         1,  5]
+  include_examples 'scan', 'a\C-bc',          1 => [:escape,  :control,          '\C-b',           1,  5]
+  include_examples 'scan', 'a\C-^b',          1 => [:escape,  :control,          '\C-^',           1,  5]
+  include_examples 'scan', 'a\C-\nb',         1 => [:escape,  :control,          '\C-\n',          1,  6]
+  include_examples 'scan', 'a\C-\\\\b',       1 => [:escape,  :control,          '\C-\\\\',        1,  6]
+  include_examples 'scan', n.('a\c\M-Bc'),    1 => [:escape,  :control,          '\c\M-B',         1,  7]
+  include_examples 'scan', n.('a\C-\M-Bc'),   1 => [:escape,  :control,          '\C-\M-B',        1,  8]
+  include_examples 'scan', n.('a\M-Bc'),      1 => [:escape,  :meta_sequence,    '\M-B',           1,  5]
+  include_examples 'scan', n.('a\M-\cBc'),    1 => [:escape,  :meta_sequence,    '\M-\cB',         1,  7]
+  include_examples 'scan', n.('a\M-\c^'),     1 => [:escape,  :meta_sequence,    '\M-\c^',         1,  7]
+  include_examples 'scan', n.('a\M-\c\n'),    1 => [:escape,  :meta_sequence,    '\M-\c\n',        1,  8]
+  include_examples 'scan', n.('a\M-\c\\\\'),  1 => [:escape,  :meta_sequence,    '\M-\c\\\\',      1,  8]
+  include_examples 'scan', n.('a\M-\C-Bc'),   1 => [:escape,  :meta_sequence,    '\M-\C-B',        1,  8]
+  include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape,  :meta_sequence,    '\M-\C-\\\\',     1,  9]
 end