RubyGems - regexp_parser - Versions diffs - 1.8.2 → 2.0.3 - Mend

regexp_parser 1.8.2 → 2.0.3

Files changed (41) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +62 -0
data/Gemfile +1 -0
data/README.md +1 -4
data/Rakefile +2 -2
data/lib/regexp_parser/expression.rb +4 -17
data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
data/lib/regexp_parser/expression/classes/group.rb +22 -2
data/lib/regexp_parser/expression/classes/root.rb +4 -16
data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
data/lib/regexp_parser/expression/quantifier.rb +9 -0
data/lib/regexp_parser/expression/sequence.rb +0 -10
data/lib/regexp_parser/lexer.rb +2 -2
data/lib/regexp_parser/parser.rb +27 -2
data/lib/regexp_parser/scanner.rb +1194 -1272
data/lib/regexp_parser/scanner/char_type.rl +11 -11
data/lib/regexp_parser/scanner/property.rl +2 -2
data/lib/regexp_parser/scanner/scanner.rl +178 -186
data/lib/regexp_parser/syntax.rb +4 -4
data/lib/regexp_parser/syntax/any.rb +2 -2
data/lib/regexp_parser/syntax/base.rb +1 -1
data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
data/lib/regexp_parser/version.rb +1 -1
data/spec/expression/base_spec.rb +10 -0
data/spec/expression/subexpression_spec.rb +1 -1
data/spec/expression/to_s_spec.rb +39 -31
data/spec/lexer/literals_spec.rb +24 -49
data/spec/parser/errors_spec.rb +1 -1
data/spec/parser/escapes_spec.rb +1 -1
data/spec/parser/quantifiers_spec.rb +16 -0
data/spec/parser/set/ranges_spec.rb +3 -3
data/spec/scanner/escapes_spec.rb +7 -0
data/spec/scanner/groups_spec.rb +10 -1
data/spec/scanner/literals_spec.rb +28 -38
data/spec/scanner/quantifiers_spec.rb +18 -13
data/spec/scanner/sets_spec.rb +23 -5
data/spec/spec_helper.rb +1 -0
metadata +3 -7
data/spec/expression/root_spec.rb +0 -9
data/spec/expression/sequence_spec.rb +0 -9

data/lib/regexp_parser/syntax.rb CHANGED

@@ -1,9 +1,9 @@
+module Regexp::Syntax
+  class SyntaxError < StandardError; end
+end
 require File.expand_path('../syntax/tokens', __FILE__)
 require File.expand_path('../syntax/base', __FILE__)
 require File.expand_path('../syntax/any', __FILE__)
 require File.expand_path('../syntax/version_lookup', __FILE__)
 require File.expand_path('../syntax/versions', __FILE__)
-module Regexp::Syntax
-  class SyntaxError < StandardError; end
-end

data/lib/regexp_parser/syntax/any.rb CHANGED

@@ -8,8 +8,8 @@ module Regexp::Syntax
       @implements = { :* => [:*] }
     end
-    def implements?(type, token) true end
-    def implements!(type, token) true end
+    def implements?(_type, _token) true end
+    def implements!(_type, _token) true end
   end
 end

data/lib/regexp_parser/syntax/base.rb CHANGED

@@ -1,7 +1,7 @@
 require 'set'
 module Regexp::Syntax
-  class NotImplementedError < SyntaxError
+  class NotImplementedError < Regexp::Syntax::SyntaxError
     def initialize(syntax, type, token)
       super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
     end

data/lib/regexp_parser/syntax/version_lookup.rb CHANGED

@@ -3,13 +3,13 @@ module Regexp::Syntax
   VERSION_REGEXP = /#{VERSION_FORMAT}/
   VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
-  class InvalidVersionNameError < SyntaxError
+  class InvalidVersionNameError < Regexp::Syntax::SyntaxError
     def initialize(name)
       super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
     end
   end
-  class UnknownSyntaxNameError < SyntaxError
+  class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
     def initialize(name)
       super "Unknown syntax name '#{name}'."
     end

data/lib/regexp_parser/version.rb CHANGED

@@ -1,5 +1,5 @@
 class Regexp
   class Parser
-    VERSION = '1.8.2'
+    VERSION = '2.0.3'
   end
 end

data/spec/expression/base_spec.rb CHANGED

@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
     expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
     expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
   end
+  specify('#base_length') do
+    expect(RP.parse(/(aa)/)[0].base_length).to eq 4
+    expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
+  end
+  specify('#full_length') do
+    expect(RP.parse(/(aa)/)[0].full_length).to eq 4
+    expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
+  end
 end

data/spec/expression/subexpression_spec.rb CHANGED

@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
     }
     root.each_expression do |exp|
-      next unless expected_nesting_level = tests.delete(exp.to_s)
+      next unless (expected_nesting_level = tests.delete(exp.to_s))
       expect(expected_nesting_level).to eq exp.nesting_level
     end

data/spec/expression/to_s_spec.rb CHANGED

@@ -1,58 +1,50 @@
 require 'spec_helper'
 RSpec.describe('Expression#to_s') do
-  specify('literal alternation') do
-    pattern = 'abcd|ghij|klmn|pqur'
+  def parse_frozen(pattern, ruby_version = nil)
+    IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
+  end
+  def expect_round_trip(pattern, ruby_version = nil)
+    parsed = parse_frozen(pattern, ruby_version)
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect(parsed.to_s).to eql(pattern)
   end
-  specify('quantified alternations') do
-    pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
+  specify('literal alternation') do
+    expect_round_trip('abcd|ghij|klmn|pqur')
+  end
-    expect(RP.parse(pattern).to_s).to eq pattern
+  specify('quantified alternations') do
+    expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
   end
   specify('quantified sets') do
-    pattern = '[abc]+|[^def]{3,6}'
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('[abc]+|[^def]{3,6}')
   end
   specify('property sets') do
-    pattern = '[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+'
-    expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
+    expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
   end
   specify('groups') do
-    pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
-    expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
+    expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
   end
   specify('assertions') do
-    pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
-    expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
+    expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
   end
   specify('comments') do
-    pattern = '(?#start)a(?#middle)b(?#end)'
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('(?#start)a(?#middle)b(?#end)')
   end
   specify('options') do
-    pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
   end
   specify('url') do
-    pattern = ('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
-    expect(RP.parse(pattern).to_s).to eq pattern
+    expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
   end
   specify('multiline source') do
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
           \z
         /x
-    expect(RP.parse(multiline).to_s).to eq multiline.source
+    expect(parse_frozen(multiline).to_s).to eql(multiline.source)
   end
   specify('multiline #to_s') do
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
           \z
         /x
-    expect(RP.parse(multiline.to_s).to_s).to eq multiline.to_s
+    expect_round_trip(multiline.to_s)
   end
   # Free spacing expressions that use spaces between quantifiers and their
@@ -93,8 +85,24 @@ RSpec.describe('Expression#to_s') do
         /x
     str = 'bbbcged'
-    root = RP.parse(multiline)
+    root = parse_frozen(multiline)
+    expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
+  end
+  # special case: implicit groups used for chained quantifiers produce no parens
+  specify 'chained quantifiers #to_s' do
+    pattern = /a+{1}{2}/
+    root = parse_frozen(pattern)
+    expect(root.to_s).to eql('a+{1}{2}')
+  end
-    expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
+  # regression test for https://github.com/ammar/regexp_parser/issues/74
+  specify('non-ascii comment') do
+    pattern = '(?x) 😋 # 😋'
+    root = RP.parse(pattern)
+    expect(root.last).to be_a(Regexp::Expression::Comment)
+    expect(root.last.to_s).to eql('# 😋')
+    expect(root.to_s).to eql(pattern)
   end
 end

data/spec/lexer/literals_spec.rb CHANGED

@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
     1 => [:literal,     :literal,       'b',        1, 2, 0, 0, 0],
     2 => [:quantifier,  :one_or_more,   '+',        2, 3, 0, 0, 0]
-  # 2 byte wide characters, Arabic
-  include_examples 'lex', 'ا',
-    0 => [:literal,     :literal,       'ا',        0, 2, 0, 0, 0]
-  include_examples 'lex', 'aاbبcت',
-    0 => [:literal,     :literal,       'aاbبcت',   0, 9, 0, 0, 0]
-  include_examples 'lex', 'aاbبت?',
-    0 => [:literal,     :literal,       'aاbب',     0, 6, 0, 0, 0],
-    1 => [:literal,     :literal,       'ت',        6, 8, 0, 0, 0],
-    2 => [:quantifier,  :zero_or_one,   '?',        8, 9, 0, 0, 0]
-  include_examples 'lex', 'aا?bبcت+',
-    0 => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
-    1 => [:literal,     :literal,       'ا',        1, 3, 0, 0, 0],
-    2 => [:quantifier,  :zero_or_one,   '?',        3, 4, 0, 0, 0],
-    3 => [:literal,     :literal,       'bبc',      4, 8, 0, 0, 0],
-    4 => [:literal,     :literal,       'ت',        8, 10, 0, 0, 0],
-    5 => [:quantifier,  :one_or_more,   '+',        10, 11, 0, 0, 0]
-  include_examples 'lex', 'a(اbب+)cت?',
-    0 => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
-    1 => [:group,       :capture,       '(',        1, 2, 0, 0, 0],
-    2 => [:literal,     :literal,       'اb',       2, 5, 1, 0, 0],
-    3 => [:literal,     :literal,       'ب',        5, 7, 1, 0, 0],
-    4 => [:quantifier,  :one_or_more,   '+',        7, 8, 1, 0, 0],
-    5 => [:group,       :close,         ')',        8, 9, 0, 0, 0],
-    6 => [:literal,     :literal,       'c',        9, 10, 0, 0, 0],
-    7 => [:literal,     :literal,       'ت',        10, 12, 0, 0, 0],
-    8 => [:quantifier,  :zero_or_one,   '?',        12, 13, 0, 0, 0]
+  # 2 byte wide characters
+  include_examples 'lex', 'äöü+',
+    0 => [:literal,     :literal,       'äö',       0, 2, 0, 0, 0],
+    1 => [:literal,     :literal,       'ü',        2, 3, 0, 0, 0],
+    2 => [:quantifier,  :one_or_more,   '+',        3, 4, 0, 0, 0]
   # 3 byte wide characters, Japanese
   include_examples 'lex', 'ab?れます+cd',
     0 => [:literal,     :literal,       'a',        0, 1, 0, 0, 0],
     1 => [:literal,     :literal,       'b',        1, 2, 0, 0, 0],
     2 => [:quantifier,  :zero_or_one,   '?',        2, 3, 0, 0, 0],
-    3 => [:literal,     :literal,       'れま',     3, 9, 0, 0, 0],
-    4 => [:literal,     :literal,       'す',       9, 12, 0, 0, 0],
-    5 => [:quantifier,  :one_or_more,   '+',        12, 13, 0, 0, 0],
-    6 => [:literal,     :literal,       'cd',       13, 15, 0, 0, 0]
+    3 => [:literal,     :literal,       'れま',     3, 5, 0, 0, 0],
+    4 => [:literal,     :literal,       'す',       5, 6, 0, 0, 0],
+    5 => [:quantifier,  :one_or_more,   '+',        6, 7, 0, 0, 0],
+    6 => [:literal,     :literal,       'cd',       7, 9, 0, 0, 0]
   # 4 byte wide characters, Osmanya
   include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
-    0 => [:literal,     :literal,       '𐒀',        0, 4, 0, 0, 0],
-    1 => [:literal,     :literal,       '𐒁',        4, 8, 0, 0, 0],
-    2 => [:quantifier,  :zero_or_one,   '?',        8, 9, 0, 0, 0],
-    3 => [:literal,     :literal,       '𐒂a',       9, 14, 0, 0, 0],
-    4 => [:literal,     :literal,       'b',        14, 15, 0, 0, 0],
-    5 => [:quantifier,  :one_or_more,   '+',        15, 16, 0, 0, 0],
-    6 => [:literal,     :literal,       '𐒃',        16, 20, 0, 0, 0]
+    0 => [:literal,     :literal,       '𐒀',        0, 1, 0, 0, 0],
+    1 => [:literal,     :literal,       '𐒁',        1, 2, 0, 0, 0],
+    2 => [:quantifier,  :zero_or_one,   '?',        2, 3, 0, 0, 0],
+    3 => [:literal,     :literal,       '𐒂a',       3, 5, 0, 0, 0],
+    4 => [:literal,     :literal,       'b',        5, 6, 0, 0, 0],
+    5 => [:quantifier,  :one_or_more,   '+',        6, 7, 0, 0, 0],
+    6 => [:literal,     :literal,       '𐒃',        7, 8, 0, 0, 0]
   include_examples 'lex', 'mu𝄞?si*𝄫c+',
     0 => [:literal,     :literal,       'mu',       0, 2, 0, 0, 0],
-    1 => [:literal,     :literal,       '𝄞',        2, 6, 0, 0, 0],
-    2 => [:quantifier,  :zero_or_one,   '?',        6, 7, 0, 0, 0],
-    3 => [:literal,     :literal,       's',        7, 8, 0, 0, 0],
-    4 => [:literal,     :literal,       'i',        8, 9, 0, 0, 0],
-    5 => [:quantifier,  :zero_or_more,  '*',        9, 10, 0, 0, 0],
-    6 => [:literal,     :literal,       '𝄫',        10, 14, 0, 0, 0],
-    7 => [:literal,     :literal,       'c',        14, 15, 0, 0, 0],
-    8 => [:quantifier,  :one_or_more,   '+',        15, 16, 0, 0, 0]
+    1 => [:literal,     :literal,       '𝄞',        2, 3, 0, 0, 0],
+    2 => [:quantifier,  :zero_or_one,   '?',        3, 4, 0, 0, 0],
+    3 => [:literal,     :literal,       's',        4, 5, 0, 0, 0],
+    4 => [:literal,     :literal,       'i',        5, 6, 0, 0, 0],
+    5 => [:quantifier,  :zero_or_more,  '*',        6, 7, 0, 0, 0],
+    6 => [:literal,     :literal,       '𝄫',        7, 8, 0, 0, 0],
+    7 => [:literal,     :literal,       'c',        8, 9, 0, 0, 0],
+    8 => [:quantifier,  :one_or_more,   '+',        9, 10, 0, 0, 0]
   specify('lex single 2 byte char') do
     tokens = RL.lex("\u0627+")

data/spec/parser/errors_spec.rb CHANGED

@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
       .to raise_error(Regexp::Parser::UnknownTokenTypeError)
   end
-  RSpec.shared_examples 'UnknownTokenError' do |type, token|
+  RSpec.shared_examples 'UnknownTokenError' do |type|
     it "raises for unkown tokens of type #{type}" do
       expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
         .to raise_error(Regexp::Parser::UnknownTokenError)

data/spec/parser/escapes_spec.rb CHANGED

@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
   include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list,    EscapeSequence::CodepointList]
   include_examples 'parse', /a\u{10FFFF}/,   1 => [:escape, :codepoint_list,    EscapeSequence::CodepointList]
-    # hex escapes
+  # hex escapes
   include_examples 'parse', /a\xFF/n,        1 => [:escape, :hex,               EscapeSequence::Hex]
   # octal escapes

data/spec/parser/quantifiers_spec.rb CHANGED

@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
       expect(exp.quantifier.min).to eq min
       expect(exp.quantifier.max).to eq max
       expect(exp.quantifier.mode).to eq mode
+      expect(exp.quantifier.text).to eq text
     end
   end
@@ -37,6 +38,21 @@ RSpec.describe('Quantifier parsing') do
   include_examples 'quantifier', /a{4}+b/,   '{4}+',   :possessive, :interval,     4, 4
   include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval,     4, 4
+  # special case: exps with chained quantifiers are wrapped in implicit passive groups
+  include_examples 'parse', /a+{2}{3}/,
+    0 => [
+      :group, :passive, Group::Passive, implicit?: true, level: 0,
+      quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
+    ],
+    [0, 0] => [
+      :group, :passive, Group::Passive, implicit?: true, level: 1,
+      quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
+    ],
+    [0, 0, 0] => [
+      :literal, :literal, Literal, text: 'a', level: 2,
+      quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
+    ]
   specify('mode-checking methods') do
     exp = RP.parse(/a??/).first

data/spec/parser/set/ranges_spec.rb CHANGED

@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
   end
   specify('parse set range hex') do
-    root = RP.parse('[\\x00-\\x99]')
+    root = RP.parse('[\\x00-\\x22]')
     set = root[0]
     range = set[0]
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
     expect(range.count).to eq 2
     expect(range.first.to_s).to eq '\\x00'
     expect(range.first).to be_instance_of(EscapeSequence::Hex)
-    expect(range.last.to_s).to eq '\\x99'
+    expect(range.last.to_s).to eq '\\x22'
     expect(range.last).to be_instance_of(EscapeSequence::Hex)
-    expect(set).to match '\\x50'
+    expect(set).to match "\x11"
   end
   specify('parse set range unicode') do

data/spec/scanner/escapes_spec.rb CHANGED

@@ -11,7 +11,13 @@ RSpec.describe('Escape scanning') do
   include_examples 'scan', /c\tt/,            1 => [:escape,  :tab,              '\t',             1,  3]
   include_examples 'scan', /c\vt/,            1 => [:escape,  :vertical_tab,     '\v',             1,  3]
+  # ineffectual literal escapes
+  # these cause "Unknown escape" warnings in Ruby for ascii chars,
+  # and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
+  # In terms of matching, Ruby treats them both like non-escaped literals.
   include_examples 'scan', 'c\qt',            1 => [:escape,  :literal,          '\q',             1,  3]
+  include_examples 'scan', 'a\üc',            1 => [:escape, :literal,           '\ü',             1,  3]
+  include_examples 'scan', 'a\😋c',           1 => [:escape, :literal,            '\😋',            1,  3]
   # these incomplete ref/call sequences are treated as literal escapes by Ruby
   include_examples 'scan', 'c\gt',            1 => [:escape,  :literal,          '\g',             1,  3]
@@ -21,6 +27,7 @@ RSpec.describe('Escape scanning') do
   include_examples 'scan', 'a\0124',          1 => [:escape,  :octal,            '\012',           1,  5]
   include_examples 'scan', '\712+7',          0 => [:escape,  :octal,            '\712',           0,  4]
+  include_examples 'scan', 'a\xA',            1 => [:escape,  :hex,              '\xA',            1,  4]
   include_examples 'scan', 'a\x24c',          1 => [:escape,  :hex,              '\x24',           1,  5]
   include_examples 'scan', 'a\x0640c',        1 => [:escape,  :hex,              '\x06',           1,  5]

data/spec/scanner/groups_spec.rb CHANGED

@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
   include_examples 'scan', '(?>abc)',         0 => [:group,     :atomic,         '(?>',        0, 3]
   include_examples 'scan', '(abc)',           0 => [:group,     :capture,        '(',          0, 1]
+  # Named groups
+  # only names that start with a hyphen or digit (ascii or other) are invalid
   include_examples 'scan', '(?<name>abc)',    0 => [:group,     :named_ab,       '(?<name>',   0, 8]
   include_examples 'scan', "(?'name'abc)",    0 => [:group,     :named_sq,       "(?'name'",   0, 8]
   include_examples 'scan', '(?<name_1>abc)',  0 => [:group,     :named_ab,       '(?<name_1>', 0,10]
   include_examples 'scan', "(?'name_1'abc)",  0 => [:group,     :named_sq,       "(?'name_1'", 0,10]
+  include_examples 'scan', '(?<name-1>abc)',  0 => [:group,     :named_ab,       '(?<name-1>', 0,10]
+  include_examples 'scan', "(?'name-1'abc)",  0 => [:group,     :named_sq,       "(?'name-1'", 0,10]
+  include_examples 'scan', "(?<name'1>abc)",  0 => [:group,     :named_ab,       "(?<name'1>", 0,10]
+  include_examples 'scan', "(?'name>1'abc)",  0 => [:group,     :named_sq,       "(?'name>1'", 0,10]
+  include_examples 'scan', '(?<üüuuüü>abc)',  0 => [:group,     :named_ab,       '(?<üüuuüü>', 0,10]
+  include_examples 'scan', "(?'üüuuüü'abc)",  0 => [:group,     :named_sq,       "(?'üüuuüü'", 0,10]
+  include_examples 'scan', "(?<😋1234😋>abc)",  0 => [:group,     :named_ab,       "(?<😋1234😋>", 0,10]
+  include_examples 'scan', "(?'😋1234😋'abc)",  0 => [:group,     :named_sq,       "(?'😋1234😋'", 0,10]
   include_examples 'scan', '(?:abc)',         0 => [:group,     :passive,        '(?:',        0, 3]
   include_examples 'scan', '(?:)',            0 => [:group,     :passive,        '(?:',        0, 3]

data/spec/scanner/literals_spec.rb CHANGED

@@ -2,48 +2,38 @@ require 'spec_helper'
 RSpec.describe('UTF8 scanning') do
   # ascii, single byte characters
-  include_examples 'scan', 'a', 0              => [:literal,     :literal,       'a',        0, 1]
+  include_examples 'scan', 'a',
+    0 => [:literal,     :literal,       'a',        0, 1]
-  include_examples 'scan', 'ab+', 0            => [:literal,     :literal,       'ab',       0, 2]
-  include_examples 'scan', 'ab+', 1            => [:quantifier,  :one_or_more,   '+',        2, 3]
+  include_examples 'scan', 'ab+',
+    0 => [:literal,     :literal,       'ab',       0, 2],
+    1 => [:quantifier,  :one_or_more,   '+',        2, 3]
-  # 2 byte wide characters, Arabic
-  include_examples 'scan', 'aاbبcت', 0         => [:literal,     :literal,       'aاbبcت',   0, 9]
-  include_examples 'scan', 'aاbبت?', 0         => [:literal,     :literal,       'aاbبت',    0, 8]
-  include_examples 'scan', 'aاbبت?', 1         => [:quantifier,  :zero_or_one,   '?',        8, 9]
-  include_examples 'scan', 'aا?bبcت+', 0       => [:literal,     :literal,       'aا',       0, 3]
-  include_examples 'scan', 'aا?bبcت+', 1       => [:quantifier,  :zero_or_one,   '?',        3, 4]
-  include_examples 'scan', 'aا?bبcت+', 2       => [:literal,     :literal,       'bبcت',     4, 10]
-  include_examples 'scan', 'aا?bبcت+', 3       => [:quantifier,  :one_or_more,   '+',        10, 11]
-  include_examples 'scan', 'a(اbب+)cت?', 0     => [:literal,     :literal,       'a',        0, 1]
-  include_examples 'scan', 'a(اbب+)cت?', 1     => [:group,       :capture,       '(',        1, 2]
-  include_examples 'scan', 'a(اbب+)cت?', 2     => [:literal,     :literal,       'اbب',      2, 7]
-  include_examples 'scan', 'a(اbب+)cت?', 3     => [:quantifier,  :one_or_more,   '+',        7, 8]
-  include_examples 'scan', 'a(اbب+)cت?', 4     => [:group,       :close,         ')',        8, 9]
-  include_examples 'scan', 'a(اbب+)cت?', 5     => [:literal,     :literal,       'cت',       9, 12]
-  include_examples 'scan', 'a(اbب+)cت?', 6     => [:quantifier,  :zero_or_one,   '?',        12, 13]
+  # 2 byte wide characters
+  include_examples 'scan', 'äöü',
+    0 => [:literal,     :literal,        'äöü',     0, 3]
   # 3 byte wide characters, Japanese
-  include_examples 'scan', 'ab?れます+cd', 0    => [:literal,     :literal,       'ab',       0, 2]
-  include_examples 'scan', 'ab?れます+cd', 1    => [:quantifier,  :zero_or_one,   '?',        2, 3]
-  include_examples 'scan', 'ab?れます+cd', 2    => [:literal,     :literal,       'れます',    3, 12]
-  include_examples 'scan', 'ab?れます+cd', 3    => [:quantifier,  :one_or_more,   '+',        12, 13]
-  include_examples 'scan', 'ab?れます+cd', 4    => [:literal,     :literal,       'cd',       13, 15]
+  include_examples 'scan', 'ab?れます+cd',
+    0 => [:literal,     :literal,       'ab',       0, 2],
+    1 => [:quantifier,  :zero_or_one,   '?',        2, 3],
+    2 => [:literal,     :literal,       'れます',    3, 6],
+    3 => [:quantifier,  :one_or_more,   '+',        6, 7],
+    4 => [:literal,     :literal,       'cd',       7, 9]
   # 4 byte wide characters, Osmanya
-  include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 0      => [:literal,     :literal,       '𐒀𐒁',       0, 8]
-  include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 1      => [:quantifier,  :zero_or_one,   '?',        8, 9]
-  include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 2      => [:literal,     :literal,       '𐒂ab',      9, 15]
-  include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 3      => [:quantifier,  :one_or_more,   '+',        15, 16]
-  include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 4      => [:literal,     :literal,       '𐒃',        16, 20]
-  include_examples 'scan', 'mu𝄞?si*𝄫c+', 0      => [:literal,     :literal,       'mu𝄞',       0, 6]
-  include_examples 'scan', 'mu𝄞?si*𝄫c+', 1      => [:quantifier,  :zero_or_one,   '?',        6, 7]
-  include_examples 'scan', 'mu𝄞?si*𝄫c+', 2      => [:literal,     :literal,       'si',       7, 9]
-  include_examples 'scan', 'mu𝄞?si*𝄫c+', 3      => [:quantifier,  :zero_or_more,  '*',        9, 10]
-  include_examples 'scan', 'mu𝄞?si*𝄫c+', 4      => [:literal,     :literal,       '𝄫c',       10, 15]
-  include_examples 'scan', 'mu𝄞?si*𝄫c+', 5      => [:quantifier,  :one_or_more,   '+',        15, 16]
+  include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
+    0 => [:literal,     :literal,       '𐒀𐒁',       0, 2],
+    1 => [:quantifier,  :zero_or_one,   '?',        2, 3],
+    2 => [:literal,     :literal,       '𐒂ab',      3, 6],
+    3 => [:quantifier,  :one_or_more,   '+',        6, 7],
+    4 => [:literal,     :literal,       '𐒃',        7, 8]
+  include_examples 'scan', 'mu𝄞?si*𝄫c+',
+    0 => [:literal,     :literal,       'mu𝄞',       0, 3],
+    1 => [:quantifier,  :zero_or_one,   '?',        3, 4],
+    2 => [:literal,     :literal,       'si',       4, 6],
+    3 => [:quantifier,  :zero_or_more,  '*',        6, 7],
+    4 => [:literal,     :literal,       '𝄫c',       7, 9],
+    5 => [:quantifier,  :one_or_more,   '+',        9, 10]
 end