regexp_parser 2.0.2 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/Gemfile +5 -1
  4. data/README.md +15 -21
  5. data/Rakefile +11 -17
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  15. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  16. data/lib/regexp_parser/expression/classes/group.rb +6 -1
  17. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  18. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  19. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  20. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  21. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  22. data/lib/regexp_parser/expression/sequence.rb +3 -10
  23. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  24. data/lib/regexp_parser/expression.rb +7 -130
  25. data/lib/regexp_parser/lexer.rb +7 -5
  26. data/lib/regexp_parser/parser.rb +282 -334
  27. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  28. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  29. data/lib/regexp_parser/scanner/scanner.rl +64 -87
  30. data/lib/regexp_parser/scanner.rb +1024 -1073
  31. data/lib/regexp_parser/syntax/any.rb +2 -4
  32. data/lib/regexp_parser/syntax/base.rb +10 -10
  33. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  36. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  37. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  38. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  39. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  41. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  42. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  44. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  45. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  46. data/lib/regexp_parser/syntax/token.rb +45 -0
  47. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  48. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  49. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  50. data/lib/regexp_parser/syntax.rb +8 -6
  51. data/lib/regexp_parser/token.rb +9 -20
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +0 -2
  54. data/spec/expression/clone_spec.rb +36 -4
  55. data/spec/expression/free_space_spec.rb +2 -2
  56. data/spec/expression/methods/match_length_spec.rb +2 -2
  57. data/spec/lexer/nesting_spec.rb +2 -2
  58. data/spec/lexer/refcalls_spec.rb +5 -0
  59. data/spec/parser/all_spec.rb +2 -2
  60. data/spec/parser/escapes_spec.rb +43 -31
  61. data/spec/parser/properties_spec.rb +6 -4
  62. data/spec/parser/refcalls_spec.rb +5 -0
  63. data/spec/parser/set/ranges_spec.rb +26 -16
  64. data/spec/scanner/escapes_spec.rb +29 -20
  65. data/spec/scanner/refcalls_spec.rb +19 -0
  66. data/spec/scanner/sets_spec.rb +66 -23
  67. data/spec/spec_helper.rb +13 -1
  68. data/spec/support/capturing_stderr.rb +9 -0
  69. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  70. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  71. data/spec/syntax/versions/aliases_spec.rb +1 -0
  72. metadata +27 -26
  73. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  74. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  75. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  76. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  77. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  78. data/spec/support/runner.rb +0 -42
  79. data/spec/support/warning_extractor.rb +0 -60
@@ -0,0 +1,45 @@
1
+ # Define the base module and the simplest of tokens.
2
+ module Regexp::Syntax
3
+ module Token
4
+ Map = {}
5
+
6
+ module Literal
7
+ All = %i[literal]
8
+ Type = :literal
9
+ end
10
+
11
+ module FreeSpace
12
+ All = %i[comment whitespace]
13
+ Type = :free_space
14
+ end
15
+
16
+ Map[FreeSpace::Type] = FreeSpace::All
17
+ Map[Literal::Type] = Literal::All
18
+ end
19
+ end
20
+
21
+
22
+ # Load all the token files, they will populate the Map constant.
23
+ require 'regexp_parser/syntax/token/anchor'
24
+ require 'regexp_parser/syntax/token/assertion'
25
+ require 'regexp_parser/syntax/token/backreference'
26
+ require 'regexp_parser/syntax/token/posix_class'
27
+ require 'regexp_parser/syntax/token/character_set'
28
+ require 'regexp_parser/syntax/token/character_type'
29
+ require 'regexp_parser/syntax/token/conditional'
30
+ require 'regexp_parser/syntax/token/escape'
31
+ require 'regexp_parser/syntax/token/group'
32
+ require 'regexp_parser/syntax/token/keep'
33
+ require 'regexp_parser/syntax/token/meta'
34
+ require 'regexp_parser/syntax/token/quantifier'
35
+ require 'regexp_parser/syntax/token/unicode_property'
36
+
37
+
38
+ # After loading all the tokens the map is full. Extract all tokens and types
39
+ # into the All and Types constants.
40
+ module Regexp::Syntax
41
+ module Token
42
+ All = Map.values.flatten.uniq.sort.freeze
43
+ Types = Map.keys.freeze
44
+ end
45
+ end
@@ -3,13 +3,13 @@ module Regexp::Syntax
3
3
  VERSION_REGEXP = /#{VERSION_FORMAT}/
4
4
  VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
5
5
 
6
- class InvalidVersionNameError < SyntaxError
6
+ class InvalidVersionNameError < Regexp::Syntax::SyntaxError
7
7
  def initialize(name)
8
8
  super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
9
9
  end
10
10
  end
11
11
 
12
- class UnknownSyntaxNameError < SyntaxError
12
+ class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
13
13
  def initialize(name)
14
14
  super "Unknown syntax name '#{name}'."
15
15
  end
@@ -5,7 +5,7 @@ module Regexp::Syntax
5
5
 
6
6
  implements :anchor, Anchor::All
7
7
  implements :assertion, Assertion::Lookahead
8
- implements :backref, [:number]
8
+ implements :backref, Backreference::Plain
9
9
  implements :posixclass, PosixClass::Standard
10
10
  implements :group, Group::All
11
11
  implements :meta, Meta::Extended
@@ -0,0 +1,10 @@
1
+ module Regexp::Syntax
2
+ class V3_1_0 < Regexp::Syntax::V2_6_3
3
+ def initialize
4
+ super
5
+
6
+ implements :property, UnicodeProperty::V3_1_0
7
+ implements :nonproperty, UnicodeProperty::V3_1_0
8
+ end
9
+ end
10
+ end
@@ -1,9 +1,11 @@
1
- require File.expand_path('../syntax/tokens', __FILE__)
2
- require File.expand_path('../syntax/base', __FILE__)
3
- require File.expand_path('../syntax/any', __FILE__)
4
- require File.expand_path('../syntax/version_lookup', __FILE__)
5
- require File.expand_path('../syntax/versions', __FILE__)
1
+ require 'regexp_parser/error'
6
2
 
7
3
  module Regexp::Syntax
8
- class SyntaxError < StandardError; end
4
+ class SyntaxError < Regexp::Parser::Error; end
9
5
  end
6
+
7
+ require_relative 'syntax/token'
8
+ require_relative 'syntax/base'
9
+ require_relative 'syntax/any'
10
+ require_relative 'syntax/version_lookup'
11
+ require_relative 'syntax/versions'
@@ -1,14 +1,13 @@
1
1
  class Regexp
2
-
3
- TOKEN_KEYS = [
4
- :type,
5
- :token,
6
- :text,
7
- :ts,
8
- :te,
9
- :level,
10
- :set_level,
11
- :conditional_level
2
+ TOKEN_KEYS = %i[
3
+ type
4
+ token
5
+ text
6
+ ts
7
+ te
8
+ level
9
+ set_level
10
+ conditional_level
12
11
  ].freeze
13
12
 
14
13
  Token = Struct.new(*TOKEN_KEYS) do
@@ -21,15 +20,5 @@ class Regexp
21
20
  def length
22
21
  te - ts
23
22
  end
24
-
25
- if RUBY_VERSION < '2.0.0'
26
- def to_h
27
- members.inject({}) do |hash, member|
28
- hash[member.to_sym] = self[member]
29
- hash
30
- end
31
- end
32
- end
33
23
  end
34
-
35
24
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.0.2'
3
+ VERSION = '2.2.0'
4
4
  end
5
5
  end
data/lib/regexp_parser.rb CHANGED
@@ -1,5 +1,3 @@
1
- # encoding: utf-8
2
-
3
1
  require 'regexp_parser/version'
4
2
  require 'regexp_parser/token'
5
3
  require 'regexp_parser/scanner'
@@ -27,8 +27,8 @@ RSpec.describe('Expression#clone') do
27
27
  expect(root_2.quantifier.object_id).not_to eq copy_2.quantifier.object_id
28
28
 
29
29
  # regression test
30
- expect { root_2.clone }.not_to change { root_2.quantifier.object_id }
31
- expect { root_2.clone }.not_to change { root_2.quantifier.text.object_id }
30
+ expect { root_2.clone }.not_to(change { root_2.quantifier.object_id })
31
+ expect { root_2.clone }.not_to(change { root_2.quantifier.text.object_id })
32
32
  end
33
33
 
34
34
  specify('Subexpression#clone') do
@@ -48,7 +48,7 @@ RSpec.describe('Expression#clone') do
48
48
  end
49
49
 
50
50
  # regression test
51
- expect { root.clone }.not_to change { root.expressions.object_id }
51
+ expect { root.clone }.not_to(change { root.expressions.object_id })
52
52
  end
53
53
 
54
54
  specify('Group::Named#clone') do
@@ -69,7 +69,39 @@ RSpec.describe('Expression#clone') do
69
69
  end
70
70
 
71
71
  # regression test
72
- expect { root_1.clone }.not_to change { root_1.name.object_id }
72
+ expect { root_1.clone }.not_to(change { root_1.name.object_id })
73
+ end
74
+
75
+ specify('Group::Options#clone') do
76
+ root = RP.parse('foo(?i)bar')
77
+ copy = root.clone
78
+
79
+ expect(copy.to_s).to eq root.to_s
80
+
81
+ root_1 = root[1]
82
+ copy_1 = copy[1]
83
+
84
+ expect(root_1.option_changes).to eq copy_1.option_changes
85
+ expect(root_1.option_changes.object_id).not_to eq copy_1.option_changes.object_id
86
+
87
+ # regression test
88
+ expect { root_1.clone }.not_to(change { root_1.option_changes.object_id })
89
+ end
90
+
91
+ specify('Backreference::Base#clone') do
92
+ root = RP.parse('(foo)\1')
93
+ copy = root.clone
94
+
95
+ expect(copy.to_s).to eq root.to_s
96
+
97
+ root_1 = root[1]
98
+ copy_1 = copy[1]
99
+
100
+ expect(root_1.referenced_expression.to_s).to eq copy_1.referenced_expression.to_s
101
+ expect(root_1.referenced_expression.object_id).not_to eq copy_1.referenced_expression.object_id
102
+
103
+ # regression test
104
+ expect { root_1.clone }.not_to(change { root_1.referenced_expression.object_id })
73
105
  end
74
106
 
75
107
  specify('Sequence#clone') do
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
10
10
  space = root[0]
11
11
 
12
12
  expect(space).to be_instance_of(FreeSpace::WhiteSpace)
13
- expect { space.quantify(:dummy, '#') }.to raise_error(RuntimeError)
13
+ expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
14
14
  end
15
15
 
16
16
  specify('comment quantify raises error') do
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
22
22
  comment = root[3]
23
23
 
24
24
  expect(comment).to be_instance_of(FreeSpace::Comment)
25
- expect { comment.quantify(:dummy, '#') }.to raise_error(RuntimeError)
25
+ expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
26
26
  end
27
27
  end
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe(Regexp::MatchLength) do
4
- ML = described_class
3
+ ML = Regexp::MatchLength
5
4
 
5
+ RSpec.describe(Regexp::MatchLength) do
6
6
  specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
7
7
  specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
8
8
  specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
59
59
  4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
60
  5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
61
 
62
- include_examples 'lex', /[[:word:]&&[^c]z]/,
62
+ include_examples 'lex', '[[:word:]&&[^c]z]',
63
63
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
64
  1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
65
  2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
70
70
  7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
71
  8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
72
 
73
- include_examples 'lex', /[\p{word}&&[^c]z]/,
73
+ include_examples 'lex', '[\p{word}&&[^c]z]',
74
74
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
75
  1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
76
  2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
32
32
  include_examples 'lex', "(abc)\\g'1'",
33
33
  3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
34
 
35
+ include_examples 'lex', '\g<0>',
36
+ 0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
37
+ include_examples 'lex', "\\g'0'",
38
+ 0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
39
+
35
40
  include_examples 'lex', '(abc)\g<-1>',
36
41
  3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
42
  include_examples 'lex', "(abc)\\g'-1'",
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
34
34
  end
35
35
 
36
36
  specify('parse no quantifier target raises error') do
37
- expect { RP.parse('?abc') }.to raise_error(ArgumentError)
37
+ expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
38
38
  end
39
39
 
40
40
  specify('parse sequence no quantifier target raises error') do
41
- expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
41
+ expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
42
42
  end
43
43
  end
@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
56
56
  expect { root[5].codepoint }.to raise_error(/#codepoints/)
57
57
  end
58
58
 
59
+ # Meta/control espaces
60
+ #
61
+ # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
62
+ # escapes can only be set with the Regexp::new constructor.
63
+ # In Regexp literals, these escapes are now pre-processed to hex escapes.
64
+ #
65
+ # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
66
+ def parse_meta_control(regexp_body)
67
+ regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
68
+ RP.parse(regexp)
69
+ end
70
+
59
71
  specify('parse escape control sequence lower') do
60
- root = RP.parse(/a\\\c2b/)
72
+ root = parse_meta_control('a\\\\\c2b')
61
73
 
62
74
  expect(root[2]).to be_instance_of(EscapeSequence::Control)
63
75
  expect(root[2].text).to eq '\\c2'
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
66
78
  end
67
79
 
68
80
  specify('parse escape control sequence upper') do
69
- root = RP.parse(/\d\\\C-C\w/)
81
+ root = parse_meta_control('\d\C-C\w')
70
82
 
71
- expect(root[2]).to be_instance_of(EscapeSequence::Control)
72
- expect(root[2].text).to eq '\\C-C'
73
- expect(root[2].char).to eq "\x03"
74
- expect(root[2].codepoint).to eq 3
83
+ expect(root[1]).to be_instance_of(EscapeSequence::Control)
84
+ expect(root[1].text).to eq '\\C-C'
85
+ expect(root[1].char).to eq "\x03"
86
+ expect(root[1].codepoint).to eq 3
75
87
  end
76
88
 
77
89
  specify('parse escape meta sequence') do
78
- root = RP.parse(/\Z\\\M-Z/n)
90
+ root = parse_meta_control('\Z\M-Z')
79
91
 
80
- expect(root[2]).to be_instance_of(EscapeSequence::Meta)
81
- expect(root[2].text).to eq '\\M-Z'
82
- expect(root[2].char).to eq "\u00DA"
83
- expect(root[2].codepoint).to eq 218
92
+ expect(root[1]).to be_instance_of(EscapeSequence::Meta)
93
+ expect(root[1].text).to eq '\\M-Z'
94
+ expect(root[1].char).to eq "\u00DA"
95
+ expect(root[1].codepoint).to eq 218
84
96
  end
85
97
 
86
98
  specify('parse escape meta control sequence') do
87
- root = RP.parse(/\A\\\M-\C-X/n)
99
+ root = parse_meta_control('\A\M-\C-X')
88
100
 
89
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
90
- expect(root[2].text).to eq '\\M-\\C-X'
91
- expect(root[2].char).to eq "\u0098"
92
- expect(root[2].codepoint).to eq 152
101
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
102
+ expect(root[1].text).to eq '\\M-\\C-X'
103
+ expect(root[1].char).to eq "\u0098"
104
+ expect(root[1].codepoint).to eq 152
93
105
  end
94
106
 
95
107
  specify('parse lower c meta control sequence') do
96
- root = RP.parse(/\A\\\M-\cX/n)
108
+ root = parse_meta_control('\A\M-\cX')
97
109
 
98
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
99
- expect(root[2].text).to eq '\\M-\\cX'
100
- expect(root[2].char).to eq "\u0098"
101
- expect(root[2].codepoint).to eq 152
110
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
111
+ expect(root[1].text).to eq '\\M-\\cX'
112
+ expect(root[1].char).to eq "\u0098"
113
+ expect(root[1].codepoint).to eq 152
102
114
  end
103
115
 
104
116
  specify('parse escape reverse meta control sequence') do
105
- root = RP.parse(/\A\\\C-\M-X/n)
117
+ root = parse_meta_control('\A\C-\M-X')
106
118
 
107
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
108
- expect(root[2].text).to eq '\\C-\\M-X'
109
- expect(root[2].char).to eq "\u0098"
110
- expect(root[2].codepoint).to eq 152
119
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
120
+ expect(root[1].text).to eq '\\C-\\M-X'
121
+ expect(root[1].char).to eq "\u0098"
122
+ expect(root[1].codepoint).to eq 152
111
123
  end
112
124
 
113
125
  specify('parse escape reverse lower c meta control sequence') do
114
- root = RP.parse(/\A\\\c\M-X/n)
126
+ root = parse_meta_control('\A\c\M-X')
115
127
 
116
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
117
- expect(root[2].text).to eq '\\c\\M-X'
118
- expect(root[2].char).to eq "\u0098"
119
- expect(root[2].codepoint).to eq 152
128
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
129
+ expect(root[1].text).to eq '\\c\\M-X'
130
+ expect(root[1].char).to eq "\u0098"
131
+ expect(root[1].codepoint).to eq 152
120
132
  end
121
133
  end
@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
37
37
  end
38
38
  end
39
39
 
40
- specify('parse all properties of current ruby') do
41
- unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
42
- RP.parse("\\p{#{prop}}") rescue false
40
+ if ruby_version_at_least('2.7.0')
41
+ specify('parse all properties of current ruby') do
42
+ unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
43
+ RP.parse("\\p{#{prop}}") rescue false
44
+ end
45
+ expect(unsupported).to be_empty
43
46
  end
44
- expect(unsupported).to be_empty
45
47
  end
46
48
 
47
49
  specify('parse property negative') do
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
29
29
  include_examples 'parse', /(abc)\g'1'/,
30
30
  1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
31
31
 
32
+ include_examples 'parse', '\g<0>',
33
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
34
+ include_examples 'parse', "\\g'0'",
35
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
36
+
32
37
  include_examples 'parse', /(abc)\g<-1>/,
33
38
  1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
34
39
  include_examples 'parse', /(abc)\g'-1'/,
@@ -1,6 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('CharacterSet::Range parsing') do
4
+ # Some edge-case patterns are evaluated with #match to make sure that
5
+ # their behavior still reflects the way they are parsed.
6
+ # #capturing_stderr is used to skip any warnings generated by this.
7
+
4
8
  specify('parse set range') do
5
9
  root = RP.parse('[a-z]')
6
10
  set = root[0]
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
13
17
  expect(range.first).to be_instance_of(Literal)
14
18
  expect(range.last.to_s).to eq 'z'
15
19
  expect(range.last).to be_instance_of(Literal)
16
- expect(set).to match 'm'
20
+ capturing_stderr { expect(set).to match 'm' }
17
21
  end
18
22
 
19
23
  specify('parse set range hex') do
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
28
32
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
33
  expect(range.last.to_s).to eq '\\x22'
30
34
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match "\x11"
35
+ capturing_stderr { expect(set).to match "\x11" }
32
36
  end
33
37
 
34
38
  specify('parse set range unicode') do
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
43
47
  expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
44
48
  expect(range.last.to_s).to eq '\\u1234'
45
49
  expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
46
- expect(set).to match '\\u600'
50
+ capturing_stderr { expect(set).to match '\\u600' }
47
51
  end
48
52
 
49
53
  specify('parse set range edge case leading dash') do
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
53
57
 
54
58
  expect(set.count).to eq 1
55
59
  expect(range.count).to eq 2
56
- expect(set).to match 'a'
60
+ capturing_stderr { expect(set).to match 'a' }
57
61
  end
58
62
 
59
63
  specify('parse set range edge case trailing dash') do
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
63
67
 
64
68
  expect(set.count).to eq 1
65
69
  expect(range.count).to eq 2
66
- expect(set).to match '$'
70
+ capturing_stderr { expect(set).to match '$' }
67
71
  end
68
72
 
69
73
  specify('parse set range edge case leading negate') do
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
71
75
  set = root[0]
72
76
 
73
77
  expect(set.count).to eq 2
74
- expect(set).to match 'a'
75
- expect(set).not_to match 'z'
78
+ capturing_stderr do
79
+ expect(set).to match 'a'
80
+ expect(set).not_to match 'z'
81
+ end
76
82
  end
77
83
 
78
84
  specify('parse set range edge case trailing negate') do
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
82
88
 
83
89
  expect(set.count).to eq 1
84
90
  expect(range.count).to eq 2
85
- expect(set).to match '$'
91
+ capturing_stderr { expect(set).to match '$' }
86
92
  end
87
93
 
88
94
  specify('parse set range edge case leading intersection') do
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
91
97
 
92
98
  expect(set.count).to eq 1
93
99
  expect(set.first.last.to_s).to eq '-bc'
94
- expect(set).to match '-'
95
- expect(set).to match 'b'
96
- expect(set).not_to match 'a'
97
- expect(set).not_to match 'c'
100
+ capturing_stderr do
101
+ expect(set).to match '-'
102
+ expect(set).to match 'b'
103
+ expect(set).not_to match 'a'
104
+ expect(set).not_to match 'c'
105
+ end
98
106
  end
99
107
 
100
108
  specify('parse set range edge case trailing intersection') do
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
103
111
 
104
112
  expect(set.count).to eq 1
105
113
  expect(set.first.first.to_s).to eq 'bc-'
106
- expect(set).to match '-'
107
- expect(set).to match 'b'
108
- expect(set).not_to match 'a'
109
- expect(set).not_to match 'c'
114
+ capturing_stderr do
115
+ expect(set).to match '-'
116
+ expect(set).to match 'b'
117
+ expect(set).not_to match 'a'
118
+ expect(set).not_to match 'c'
119
+ end
110
120
  end
111
121
  end
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
4
4
  include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
5
 
6
6
  # not an escape outside a character set
7
- include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
8
 
9
9
  include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
10
  include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
35
35
  include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
36
36
  include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
37
37
 
38
- include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
39
- include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
40
- include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
41
- include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
42
- include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
43
- include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
44
- include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
45
- include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
46
- include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
47
- include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
48
-
49
- include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
50
- include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
51
- include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
52
- include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
53
- include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
54
- include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
55
- include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
56
-
57
38
  include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
58
39
  include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
59
40
  include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
61
42
  include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
62
43
  include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
63
44
  include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
45
+
46
+ # Meta/control espaces
47
+ #
48
+ # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
49
+ # escapes can only be set with the Regexp::new constructor.
50
+ # In Regexp literals, these escapes are now pre-processed to hex escapes.
51
+ #
52
+ # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
53
+ n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
54
+
55
+ include_examples 'scan', 'a\cBc', 1 => [:escape, :control, '\cB', 1, 4]
56
+ include_examples 'scan', 'a\c^c', 1 => [:escape, :control, '\c^', 1, 4]
57
+ include_examples 'scan', 'a\c\n', 1 => [:escape, :control, '\c\n', 1, 5]
58
+ include_examples 'scan', 'a\c\\\\b', 1 => [:escape, :control, '\c\\\\', 1, 5]
59
+ include_examples 'scan', 'a\C-bc', 1 => [:escape, :control, '\C-b', 1, 5]
60
+ include_examples 'scan', 'a\C-^b', 1 => [:escape, :control, '\C-^', 1, 5]
61
+ include_examples 'scan', 'a\C-\nb', 1 => [:escape, :control, '\C-\n', 1, 6]
62
+ include_examples 'scan', 'a\C-\\\\b', 1 => [:escape, :control, '\C-\\\\', 1, 6]
63
+ include_examples 'scan', n.('a\c\M-Bc'), 1 => [:escape, :control, '\c\M-B', 1, 7]
64
+ include_examples 'scan', n.('a\C-\M-Bc'), 1 => [:escape, :control, '\C-\M-B', 1, 8]
65
+
66
+ include_examples 'scan', n.('a\M-Bc'), 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
67
+ include_examples 'scan', n.('a\M-\cBc'), 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
68
+ include_examples 'scan', n.('a\M-\c^'), 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
69
+ include_examples 'scan', n.('a\M-\c\n'), 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
70
+ include_examples 'scan', n.('a\M-\c\\\\'), 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
71
+ include_examples 'scan', n.('a\M-\C-Bc'), 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
72
+ include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
64
73
  end