regexp_parser 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/Gemfile +5 -1
  4. data/README.md +15 -21
  5. data/Rakefile +11 -17
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  15. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  16. data/lib/regexp_parser/expression/classes/group.rb +6 -1
  17. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  18. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  19. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  20. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  21. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  22. data/lib/regexp_parser/expression/sequence.rb +3 -10
  23. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  24. data/lib/regexp_parser/expression.rb +7 -130
  25. data/lib/regexp_parser/lexer.rb +7 -5
  26. data/lib/regexp_parser/parser.rb +282 -334
  27. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  28. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  29. data/lib/regexp_parser/scanner/scanner.rl +64 -87
  30. data/lib/regexp_parser/scanner.rb +1024 -1073
  31. data/lib/regexp_parser/syntax/any.rb +2 -4
  32. data/lib/regexp_parser/syntax/base.rb +10 -10
  33. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  36. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  37. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  38. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  39. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  41. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  42. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  44. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  45. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  46. data/lib/regexp_parser/syntax/token.rb +45 -0
  47. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  48. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  49. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  50. data/lib/regexp_parser/syntax.rb +8 -6
  51. data/lib/regexp_parser/token.rb +9 -20
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +0 -2
  54. data/spec/expression/clone_spec.rb +36 -4
  55. data/spec/expression/free_space_spec.rb +2 -2
  56. data/spec/expression/methods/match_length_spec.rb +2 -2
  57. data/spec/lexer/nesting_spec.rb +2 -2
  58. data/spec/lexer/refcalls_spec.rb +5 -0
  59. data/spec/parser/all_spec.rb +2 -2
  60. data/spec/parser/escapes_spec.rb +43 -31
  61. data/spec/parser/properties_spec.rb +6 -4
  62. data/spec/parser/refcalls_spec.rb +5 -0
  63. data/spec/parser/set/ranges_spec.rb +26 -16
  64. data/spec/scanner/escapes_spec.rb +29 -20
  65. data/spec/scanner/refcalls_spec.rb +19 -0
  66. data/spec/scanner/sets_spec.rb +66 -23
  67. data/spec/spec_helper.rb +13 -1
  68. data/spec/support/capturing_stderr.rb +9 -0
  69. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  70. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  71. data/spec/syntax/versions/aliases_spec.rb +1 -0
  72. metadata +27 -26
  73. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  74. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  75. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  76. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  77. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  78. data/spec/support/runner.rb +0 -42
  79. data/spec/support/warning_extractor.rb +0 -60
@@ -0,0 +1,45 @@
1
+ # Define the base module and the simplest of tokens.
2
+ module Regexp::Syntax
3
+ module Token
4
+ Map = {}
5
+
6
+ module Literal
7
+ All = %i[literal]
8
+ Type = :literal
9
+ end
10
+
11
+ module FreeSpace
12
+ All = %i[comment whitespace]
13
+ Type = :free_space
14
+ end
15
+
16
+ Map[FreeSpace::Type] = FreeSpace::All
17
+ Map[Literal::Type] = Literal::All
18
+ end
19
+ end
20
+
21
+
22
+ # Load all the token files, they will populate the Map constant.
23
+ require 'regexp_parser/syntax/token/anchor'
24
+ require 'regexp_parser/syntax/token/assertion'
25
+ require 'regexp_parser/syntax/token/backreference'
26
+ require 'regexp_parser/syntax/token/posix_class'
27
+ require 'regexp_parser/syntax/token/character_set'
28
+ require 'regexp_parser/syntax/token/character_type'
29
+ require 'regexp_parser/syntax/token/conditional'
30
+ require 'regexp_parser/syntax/token/escape'
31
+ require 'regexp_parser/syntax/token/group'
32
+ require 'regexp_parser/syntax/token/keep'
33
+ require 'regexp_parser/syntax/token/meta'
34
+ require 'regexp_parser/syntax/token/quantifier'
35
+ require 'regexp_parser/syntax/token/unicode_property'
36
+
37
+
38
+ # After loading all the tokens the map is full. Extract all tokens and types
39
+ # into the All and Types constants.
40
+ module Regexp::Syntax
41
+ module Token
42
+ All = Map.values.flatten.uniq.sort.freeze
43
+ Types = Map.keys.freeze
44
+ end
45
+ end
@@ -3,13 +3,13 @@ module Regexp::Syntax
3
3
  VERSION_REGEXP = /#{VERSION_FORMAT}/
4
4
  VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
5
5
 
6
- class InvalidVersionNameError < SyntaxError
6
+ class InvalidVersionNameError < Regexp::Syntax::SyntaxError
7
7
  def initialize(name)
8
8
  super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
9
9
  end
10
10
  end
11
11
 
12
- class UnknownSyntaxNameError < SyntaxError
12
+ class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
13
13
  def initialize(name)
14
14
  super "Unknown syntax name '#{name}'."
15
15
  end
@@ -5,7 +5,7 @@ module Regexp::Syntax
5
5
 
6
6
  implements :anchor, Anchor::All
7
7
  implements :assertion, Assertion::Lookahead
8
- implements :backref, [:number]
8
+ implements :backref, Backreference::Plain
9
9
  implements :posixclass, PosixClass::Standard
10
10
  implements :group, Group::All
11
11
  implements :meta, Meta::Extended
@@ -0,0 +1,10 @@
1
+ module Regexp::Syntax
2
+ class V3_1_0 < Regexp::Syntax::V2_6_3
3
+ def initialize
4
+ super
5
+
6
+ implements :property, UnicodeProperty::V3_1_0
7
+ implements :nonproperty, UnicodeProperty::V3_1_0
8
+ end
9
+ end
10
+ end
@@ -1,9 +1,11 @@
1
- require File.expand_path('../syntax/tokens', __FILE__)
2
- require File.expand_path('../syntax/base', __FILE__)
3
- require File.expand_path('../syntax/any', __FILE__)
4
- require File.expand_path('../syntax/version_lookup', __FILE__)
5
- require File.expand_path('../syntax/versions', __FILE__)
1
+ require 'regexp_parser/error'
6
2
 
7
3
  module Regexp::Syntax
8
- class SyntaxError < StandardError; end
4
+ class SyntaxError < Regexp::Parser::Error; end
9
5
  end
6
+
7
+ require_relative 'syntax/token'
8
+ require_relative 'syntax/base'
9
+ require_relative 'syntax/any'
10
+ require_relative 'syntax/version_lookup'
11
+ require_relative 'syntax/versions'
@@ -1,14 +1,13 @@
1
1
  class Regexp
2
-
3
- TOKEN_KEYS = [
4
- :type,
5
- :token,
6
- :text,
7
- :ts,
8
- :te,
9
- :level,
10
- :set_level,
11
- :conditional_level
2
+ TOKEN_KEYS = %i[
3
+ type
4
+ token
5
+ text
6
+ ts
7
+ te
8
+ level
9
+ set_level
10
+ conditional_level
12
11
  ].freeze
13
12
 
14
13
  Token = Struct.new(*TOKEN_KEYS) do
@@ -21,15 +20,5 @@ class Regexp
21
20
  def length
22
21
  te - ts
23
22
  end
24
-
25
- if RUBY_VERSION < '2.0.0'
26
- def to_h
27
- members.inject({}) do |hash, member|
28
- hash[member.to_sym] = self[member]
29
- hash
30
- end
31
- end
32
- end
33
23
  end
34
-
35
24
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.0.2'
3
+ VERSION = '2.2.0'
4
4
  end
5
5
  end
data/lib/regexp_parser.rb CHANGED
@@ -1,5 +1,3 @@
1
- # encoding: utf-8
2
-
3
1
  require 'regexp_parser/version'
4
2
  require 'regexp_parser/token'
5
3
  require 'regexp_parser/scanner'
@@ -27,8 +27,8 @@ RSpec.describe('Expression#clone') do
27
27
  expect(root_2.quantifier.object_id).not_to eq copy_2.quantifier.object_id
28
28
 
29
29
  # regression test
30
- expect { root_2.clone }.not_to change { root_2.quantifier.object_id }
31
- expect { root_2.clone }.not_to change { root_2.quantifier.text.object_id }
30
+ expect { root_2.clone }.not_to(change { root_2.quantifier.object_id })
31
+ expect { root_2.clone }.not_to(change { root_2.quantifier.text.object_id })
32
32
  end
33
33
 
34
34
  specify('Subexpression#clone') do
@@ -48,7 +48,7 @@ RSpec.describe('Expression#clone') do
48
48
  end
49
49
 
50
50
  # regression test
51
- expect { root.clone }.not_to change { root.expressions.object_id }
51
+ expect { root.clone }.not_to(change { root.expressions.object_id })
52
52
  end
53
53
 
54
54
  specify('Group::Named#clone') do
@@ -69,7 +69,39 @@ RSpec.describe('Expression#clone') do
69
69
  end
70
70
 
71
71
  # regression test
72
- expect { root_1.clone }.not_to change { root_1.name.object_id }
72
+ expect { root_1.clone }.not_to(change { root_1.name.object_id })
73
+ end
74
+
75
+ specify('Group::Options#clone') do
76
+ root = RP.parse('foo(?i)bar')
77
+ copy = root.clone
78
+
79
+ expect(copy.to_s).to eq root.to_s
80
+
81
+ root_1 = root[1]
82
+ copy_1 = copy[1]
83
+
84
+ expect(root_1.option_changes).to eq copy_1.option_changes
85
+ expect(root_1.option_changes.object_id).not_to eq copy_1.option_changes.object_id
86
+
87
+ # regression test
88
+ expect { root_1.clone }.not_to(change { root_1.option_changes.object_id })
89
+ end
90
+
91
+ specify('Backreference::Base#clone') do
92
+ root = RP.parse('(foo)\1')
93
+ copy = root.clone
94
+
95
+ expect(copy.to_s).to eq root.to_s
96
+
97
+ root_1 = root[1]
98
+ copy_1 = copy[1]
99
+
100
+ expect(root_1.referenced_expression.to_s).to eq copy_1.referenced_expression.to_s
101
+ expect(root_1.referenced_expression.object_id).not_to eq copy_1.referenced_expression.object_id
102
+
103
+ # regression test
104
+ expect { root_1.clone }.not_to(change { root_1.referenced_expression.object_id })
73
105
  end
74
106
 
75
107
  specify('Sequence#clone') do
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
10
10
  space = root[0]
11
11
 
12
12
  expect(space).to be_instance_of(FreeSpace::WhiteSpace)
13
- expect { space.quantify(:dummy, '#') }.to raise_error(RuntimeError)
13
+ expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
14
14
  end
15
15
 
16
16
  specify('comment quantify raises error') do
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
22
22
  comment = root[3]
23
23
 
24
24
  expect(comment).to be_instance_of(FreeSpace::Comment)
25
- expect { comment.quantify(:dummy, '#') }.to raise_error(RuntimeError)
25
+ expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
26
26
  end
27
27
  end
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe(Regexp::MatchLength) do
4
- ML = described_class
3
+ ML = Regexp::MatchLength
5
4
 
5
+ RSpec.describe(Regexp::MatchLength) do
6
6
  specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
7
7
  specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
8
8
  specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
59
59
  4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
60
  5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
61
 
62
- include_examples 'lex', /[[:word:]&&[^c]z]/,
62
+ include_examples 'lex', '[[:word:]&&[^c]z]',
63
63
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
64
  1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
65
  2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
70
70
  7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
71
  8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
72
 
73
- include_examples 'lex', /[\p{word}&&[^c]z]/,
73
+ include_examples 'lex', '[\p{word}&&[^c]z]',
74
74
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
75
  1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
76
  2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
32
32
  include_examples 'lex', "(abc)\\g'1'",
33
33
  3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
34
 
35
+ include_examples 'lex', '\g<0>',
36
+ 0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
37
+ include_examples 'lex', "\\g'0'",
38
+ 0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
39
+
35
40
  include_examples 'lex', '(abc)\g<-1>',
36
41
  3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
42
  include_examples 'lex', "(abc)\\g'-1'",
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
34
34
  end
35
35
 
36
36
  specify('parse no quantifier target raises error') do
37
- expect { RP.parse('?abc') }.to raise_error(ArgumentError)
37
+ expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
38
38
  end
39
39
 
40
40
  specify('parse sequence no quantifier target raises error') do
41
- expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
41
+ expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
42
42
  end
43
43
  end
@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
56
56
  expect { root[5].codepoint }.to raise_error(/#codepoints/)
57
57
  end
58
58
 
59
+ # Meta/control espaces
60
+ #
61
+ # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
62
+ # escapes can only be set with the Regexp::new constructor.
63
+ # In Regexp literals, these escapes are now pre-processed to hex escapes.
64
+ #
65
+ # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
66
+ def parse_meta_control(regexp_body)
67
+ regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
68
+ RP.parse(regexp)
69
+ end
70
+
59
71
  specify('parse escape control sequence lower') do
60
- root = RP.parse(/a\\\c2b/)
72
+ root = parse_meta_control('a\\\\\c2b')
61
73
 
62
74
  expect(root[2]).to be_instance_of(EscapeSequence::Control)
63
75
  expect(root[2].text).to eq '\\c2'
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
66
78
  end
67
79
 
68
80
  specify('parse escape control sequence upper') do
69
- root = RP.parse(/\d\\\C-C\w/)
81
+ root = parse_meta_control('\d\C-C\w')
70
82
 
71
- expect(root[2]).to be_instance_of(EscapeSequence::Control)
72
- expect(root[2].text).to eq '\\C-C'
73
- expect(root[2].char).to eq "\x03"
74
- expect(root[2].codepoint).to eq 3
83
+ expect(root[1]).to be_instance_of(EscapeSequence::Control)
84
+ expect(root[1].text).to eq '\\C-C'
85
+ expect(root[1].char).to eq "\x03"
86
+ expect(root[1].codepoint).to eq 3
75
87
  end
76
88
 
77
89
  specify('parse escape meta sequence') do
78
- root = RP.parse(/\Z\\\M-Z/n)
90
+ root = parse_meta_control('\Z\M-Z')
79
91
 
80
- expect(root[2]).to be_instance_of(EscapeSequence::Meta)
81
- expect(root[2].text).to eq '\\M-Z'
82
- expect(root[2].char).to eq "\u00DA"
83
- expect(root[2].codepoint).to eq 218
92
+ expect(root[1]).to be_instance_of(EscapeSequence::Meta)
93
+ expect(root[1].text).to eq '\\M-Z'
94
+ expect(root[1].char).to eq "\u00DA"
95
+ expect(root[1].codepoint).to eq 218
84
96
  end
85
97
 
86
98
  specify('parse escape meta control sequence') do
87
- root = RP.parse(/\A\\\M-\C-X/n)
99
+ root = parse_meta_control('\A\M-\C-X')
88
100
 
89
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
90
- expect(root[2].text).to eq '\\M-\\C-X'
91
- expect(root[2].char).to eq "\u0098"
92
- expect(root[2].codepoint).to eq 152
101
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
102
+ expect(root[1].text).to eq '\\M-\\C-X'
103
+ expect(root[1].char).to eq "\u0098"
104
+ expect(root[1].codepoint).to eq 152
93
105
  end
94
106
 
95
107
  specify('parse lower c meta control sequence') do
96
- root = RP.parse(/\A\\\M-\cX/n)
108
+ root = parse_meta_control('\A\M-\cX')
97
109
 
98
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
99
- expect(root[2].text).to eq '\\M-\\cX'
100
- expect(root[2].char).to eq "\u0098"
101
- expect(root[2].codepoint).to eq 152
110
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
111
+ expect(root[1].text).to eq '\\M-\\cX'
112
+ expect(root[1].char).to eq "\u0098"
113
+ expect(root[1].codepoint).to eq 152
102
114
  end
103
115
 
104
116
  specify('parse escape reverse meta control sequence') do
105
- root = RP.parse(/\A\\\C-\M-X/n)
117
+ root = parse_meta_control('\A\C-\M-X')
106
118
 
107
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
108
- expect(root[2].text).to eq '\\C-\\M-X'
109
- expect(root[2].char).to eq "\u0098"
110
- expect(root[2].codepoint).to eq 152
119
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
120
+ expect(root[1].text).to eq '\\C-\\M-X'
121
+ expect(root[1].char).to eq "\u0098"
122
+ expect(root[1].codepoint).to eq 152
111
123
  end
112
124
 
113
125
  specify('parse escape reverse lower c meta control sequence') do
114
- root = RP.parse(/\A\\\c\M-X/n)
126
+ root = parse_meta_control('\A\c\M-X')
115
127
 
116
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
117
- expect(root[2].text).to eq '\\c\\M-X'
118
- expect(root[2].char).to eq "\u0098"
119
- expect(root[2].codepoint).to eq 152
128
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
129
+ expect(root[1].text).to eq '\\c\\M-X'
130
+ expect(root[1].char).to eq "\u0098"
131
+ expect(root[1].codepoint).to eq 152
120
132
  end
121
133
  end
@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
37
37
  end
38
38
  end
39
39
 
40
- specify('parse all properties of current ruby') do
41
- unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
42
- RP.parse("\\p{#{prop}}") rescue false
40
+ if ruby_version_at_least('2.7.0')
41
+ specify('parse all properties of current ruby') do
42
+ unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
43
+ RP.parse("\\p{#{prop}}") rescue false
44
+ end
45
+ expect(unsupported).to be_empty
43
46
  end
44
- expect(unsupported).to be_empty
45
47
  end
46
48
 
47
49
  specify('parse property negative') do
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
29
29
  include_examples 'parse', /(abc)\g'1'/,
30
30
  1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
31
31
 
32
+ include_examples 'parse', '\g<0>',
33
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
34
+ include_examples 'parse', "\\g'0'",
35
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
36
+
32
37
  include_examples 'parse', /(abc)\g<-1>/,
33
38
  1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
34
39
  include_examples 'parse', /(abc)\g'-1'/,
@@ -1,6 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('CharacterSet::Range parsing') do
4
+ # Some edge-case patterns are evaluated with #match to make sure that
5
+ # their behavior still reflects the way they are parsed.
6
+ # #capturing_stderr is used to skip any warnings generated by this.
7
+
4
8
  specify('parse set range') do
5
9
  root = RP.parse('[a-z]')
6
10
  set = root[0]
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
13
17
  expect(range.first).to be_instance_of(Literal)
14
18
  expect(range.last.to_s).to eq 'z'
15
19
  expect(range.last).to be_instance_of(Literal)
16
- expect(set).to match 'm'
20
+ capturing_stderr { expect(set).to match 'm' }
17
21
  end
18
22
 
19
23
  specify('parse set range hex') do
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
28
32
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
33
  expect(range.last.to_s).to eq '\\x22'
30
34
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match "\x11"
35
+ capturing_stderr { expect(set).to match "\x11" }
32
36
  end
33
37
 
34
38
  specify('parse set range unicode') do
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
43
47
  expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
44
48
  expect(range.last.to_s).to eq '\\u1234'
45
49
  expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
46
- expect(set).to match '\\u600'
50
+ capturing_stderr { expect(set).to match '\\u600' }
47
51
  end
48
52
 
49
53
  specify('parse set range edge case leading dash') do
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
53
57
 
54
58
  expect(set.count).to eq 1
55
59
  expect(range.count).to eq 2
56
- expect(set).to match 'a'
60
+ capturing_stderr { expect(set).to match 'a' }
57
61
  end
58
62
 
59
63
  specify('parse set range edge case trailing dash') do
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
63
67
 
64
68
  expect(set.count).to eq 1
65
69
  expect(range.count).to eq 2
66
- expect(set).to match '$'
70
+ capturing_stderr { expect(set).to match '$' }
67
71
  end
68
72
 
69
73
  specify('parse set range edge case leading negate') do
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
71
75
  set = root[0]
72
76
 
73
77
  expect(set.count).to eq 2
74
- expect(set).to match 'a'
75
- expect(set).not_to match 'z'
78
+ capturing_stderr do
79
+ expect(set).to match 'a'
80
+ expect(set).not_to match 'z'
81
+ end
76
82
  end
77
83
 
78
84
  specify('parse set range edge case trailing negate') do
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
82
88
 
83
89
  expect(set.count).to eq 1
84
90
  expect(range.count).to eq 2
85
- expect(set).to match '$'
91
+ capturing_stderr { expect(set).to match '$' }
86
92
  end
87
93
 
88
94
  specify('parse set range edge case leading intersection') do
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
91
97
 
92
98
  expect(set.count).to eq 1
93
99
  expect(set.first.last.to_s).to eq '-bc'
94
- expect(set).to match '-'
95
- expect(set).to match 'b'
96
- expect(set).not_to match 'a'
97
- expect(set).not_to match 'c'
100
+ capturing_stderr do
101
+ expect(set).to match '-'
102
+ expect(set).to match 'b'
103
+ expect(set).not_to match 'a'
104
+ expect(set).not_to match 'c'
105
+ end
98
106
  end
99
107
 
100
108
  specify('parse set range edge case trailing intersection') do
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
103
111
 
104
112
  expect(set.count).to eq 1
105
113
  expect(set.first.first.to_s).to eq 'bc-'
106
- expect(set).to match '-'
107
- expect(set).to match 'b'
108
- expect(set).not_to match 'a'
109
- expect(set).not_to match 'c'
114
+ capturing_stderr do
115
+ expect(set).to match '-'
116
+ expect(set).to match 'b'
117
+ expect(set).not_to match 'a'
118
+ expect(set).not_to match 'c'
119
+ end
110
120
  end
111
121
  end
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
4
4
  include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
5
 
6
6
  # not an escape outside a character set
7
- include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
8
 
9
9
  include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
10
  include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
35
35
  include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
36
36
  include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
37
37
 
38
- include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
39
- include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
40
- include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
41
- include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
42
- include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
43
- include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
44
- include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
45
- include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
46
- include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
47
- include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
48
-
49
- include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
50
- include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
51
- include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
52
- include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
53
- include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
54
- include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
55
- include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
56
-
57
38
  include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
58
39
  include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
59
40
  include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
61
42
  include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
62
43
  include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
63
44
  include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
45
+
46
+ # Meta/control espaces
47
+ #
48
+ # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
49
+ # escapes can only be set with the Regexp::new constructor.
50
+ # In Regexp literals, these escapes are now pre-processed to hex escapes.
51
+ #
52
+ # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
53
+ n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
54
+
55
+ include_examples 'scan', 'a\cBc', 1 => [:escape, :control, '\cB', 1, 4]
56
+ include_examples 'scan', 'a\c^c', 1 => [:escape, :control, '\c^', 1, 4]
57
+ include_examples 'scan', 'a\c\n', 1 => [:escape, :control, '\c\n', 1, 5]
58
+ include_examples 'scan', 'a\c\\\\b', 1 => [:escape, :control, '\c\\\\', 1, 5]
59
+ include_examples 'scan', 'a\C-bc', 1 => [:escape, :control, '\C-b', 1, 5]
60
+ include_examples 'scan', 'a\C-^b', 1 => [:escape, :control, '\C-^', 1, 5]
61
+ include_examples 'scan', 'a\C-\nb', 1 => [:escape, :control, '\C-\n', 1, 6]
62
+ include_examples 'scan', 'a\C-\\\\b', 1 => [:escape, :control, '\C-\\\\', 1, 6]
63
+ include_examples 'scan', n.('a\c\M-Bc'), 1 => [:escape, :control, '\c\M-B', 1, 7]
64
+ include_examples 'scan', n.('a\C-\M-Bc'), 1 => [:escape, :control, '\C-\M-B', 1, 8]
65
+
66
+ include_examples 'scan', n.('a\M-Bc'), 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
67
+ include_examples 'scan', n.('a\M-\cBc'), 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
68
+ include_examples 'scan', n.('a\M-\c^'), 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
69
+ include_examples 'scan', n.('a\M-\c\n'), 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
70
+ include_examples 'scan', n.('a\M-\c\\\\'), 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
71
+ include_examples 'scan', n.('a\M-\C-Bc'), 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
72
+ include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
64
73
  end