regexp_parser 2.1.1 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +15 -21
  4. data/Rakefile +5 -11
  5. data/lib/regexp_parser/expression/base.rb +123 -0
  6. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  7. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  8. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  11. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  12. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  13. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  14. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  16. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  17. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  18. data/lib/regexp_parser/expression/sequence.rb +0 -1
  19. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  20. data/lib/regexp_parser/expression.rb +6 -130
  21. data/lib/regexp_parser/lexer.rb +7 -5
  22. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  23. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  24. data/lib/regexp_parser/syntax/any.rb +1 -3
  25. data/lib/regexp_parser/syntax/base.rb +9 -9
  26. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  27. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  28. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  29. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  30. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  31. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  32. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  34. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  35. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  36. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  38. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  39. data/lib/regexp_parser/syntax/token.rb +45 -0
  40. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  41. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  42. data/lib/regexp_parser/syntax.rb +1 -1
  43. data/lib/regexp_parser/token.rb +9 -20
  44. data/lib/regexp_parser/version.rb +1 -1
  45. data/lib/regexp_parser.rb +0 -2
  46. data/spec/lexer/nesting_spec.rb +2 -2
  47. data/spec/parser/escapes_spec.rb +43 -31
  48. data/spec/parser/properties_spec.rb +6 -4
  49. data/spec/parser/set/ranges_spec.rb +26 -16
  50. data/spec/scanner/escapes_spec.rb +28 -19
  51. data/spec/scanner/sets_spec.rb +9 -9
  52. data/spec/spec_helper.rb +13 -1
  53. data/spec/support/capturing_stderr.rb +9 -0
  54. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  55. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  56. data/spec/syntax/versions/aliases_spec.rb +1 -0
  57. metadata +26 -26
  58. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  59. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  60. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  61. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  62. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  63. data/spec/support/runner.rb +0 -42
  64. data/spec/support/warning_extractor.rb +0 -60
@@ -0,0 +1,45 @@
1
+ # Define the base module and the simplest of tokens.
2
+ module Regexp::Syntax
3
+ module Token
4
+ Map = {}
5
+
6
+ module Literal
7
+ All = %i[literal]
8
+ Type = :literal
9
+ end
10
+
11
+ module FreeSpace
12
+ All = %i[comment whitespace]
13
+ Type = :free_space
14
+ end
15
+
16
+ Map[FreeSpace::Type] = FreeSpace::All
17
+ Map[Literal::Type] = Literal::All
18
+ end
19
+ end
20
+
21
+
22
+ # Load all the token files, they will populate the Map constant.
23
+ require 'regexp_parser/syntax/token/anchor'
24
+ require 'regexp_parser/syntax/token/assertion'
25
+ require 'regexp_parser/syntax/token/backreference'
26
+ require 'regexp_parser/syntax/token/posix_class'
27
+ require 'regexp_parser/syntax/token/character_set'
28
+ require 'regexp_parser/syntax/token/character_type'
29
+ require 'regexp_parser/syntax/token/conditional'
30
+ require 'regexp_parser/syntax/token/escape'
31
+ require 'regexp_parser/syntax/token/group'
32
+ require 'regexp_parser/syntax/token/keep'
33
+ require 'regexp_parser/syntax/token/meta'
34
+ require 'regexp_parser/syntax/token/quantifier'
35
+ require 'regexp_parser/syntax/token/unicode_property'
36
+
37
+
38
+ # After loading all the tokens the map is full. Extract all tokens and types
39
+ # into the All and Types constants.
40
+ module Regexp::Syntax
41
+ module Token
42
+ All = Map.values.flatten.uniq.sort.freeze
43
+ Types = Map.keys.freeze
44
+ end
45
+ end
@@ -5,7 +5,7 @@ module Regexp::Syntax
5
5
 
6
6
  implements :anchor, Anchor::All
7
7
  implements :assertion, Assertion::Lookahead
8
- implements :backref, [:number]
8
+ implements :backref, Backreference::Plain
9
9
  implements :posixclass, PosixClass::Standard
10
10
  implements :group, Group::All
11
11
  implements :meta, Meta::Extended
@@ -0,0 +1,10 @@
1
+ module Regexp::Syntax
2
+ class V3_1_0 < Regexp::Syntax::V2_6_3
3
+ def initialize
4
+ super
5
+
6
+ implements :property, UnicodeProperty::V3_1_0
7
+ implements :nonproperty, UnicodeProperty::V3_1_0
8
+ end
9
+ end
10
+ end
@@ -4,7 +4,7 @@ module Regexp::Syntax
4
4
  class SyntaxError < Regexp::Parser::Error; end
5
5
  end
6
6
 
7
- require_relative 'syntax/tokens'
7
+ require_relative 'syntax/token'
8
8
  require_relative 'syntax/base'
9
9
  require_relative 'syntax/any'
10
10
  require_relative 'syntax/version_lookup'
@@ -1,14 +1,13 @@
1
1
  class Regexp
2
-
3
- TOKEN_KEYS = [
4
- :type,
5
- :token,
6
- :text,
7
- :ts,
8
- :te,
9
- :level,
10
- :set_level,
11
- :conditional_level
2
+ TOKEN_KEYS = %i[
3
+ type
4
+ token
5
+ text
6
+ ts
7
+ te
8
+ level
9
+ set_level
10
+ conditional_level
12
11
  ].freeze
13
12
 
14
13
  Token = Struct.new(*TOKEN_KEYS) do
@@ -21,15 +20,5 @@ class Regexp
21
20
  def length
22
21
  te - ts
23
22
  end
24
-
25
- if RUBY_VERSION < '2.0.0'
26
- def to_h
27
- members.inject({}) do |hash, member|
28
- hash[member.to_sym] = self[member]
29
- hash
30
- end
31
- end
32
- end
33
23
  end
34
-
35
24
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.1.1'
3
+ VERSION = '2.2.0'
4
4
  end
5
5
  end
data/lib/regexp_parser.rb CHANGED
@@ -1,5 +1,3 @@
1
- # encoding: utf-8
2
-
3
1
  require 'regexp_parser/version'
4
2
  require 'regexp_parser/token'
5
3
  require 'regexp_parser/scanner'
@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
59
59
  4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
60
60
  5 => [:set, :close, ']', 5, 6, 0, 0, 0]
61
61
 
62
- include_examples 'lex', /[[:word:]&&[^c]z]/,
62
+ include_examples 'lex', '[[:word:]&&[^c]z]',
63
63
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
64
64
  1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
65
65
  2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
70
70
  7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
71
71
  8 => [:set, :close, ']', 16, 17, 0, 0, 0]
72
72
 
73
- include_examples 'lex', /[\p{word}&&[^c]z]/,
73
+ include_examples 'lex', '[\p{word}&&[^c]z]',
74
74
  0 => [:set, :open, '[', 0, 1, 0, 0, 0],
75
75
  1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
76
76
  2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
56
56
  expect { root[5].codepoint }.to raise_error(/#codepoints/)
57
57
  end
58
58
 
59
+ # Meta/control espaces
60
+ #
61
+ # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
62
+ # escapes can only be set with the Regexp::new constructor.
63
+ # In Regexp literals, these escapes are now pre-processed to hex escapes.
64
+ #
65
+ # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
66
+ def parse_meta_control(regexp_body)
67
+ regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
68
+ RP.parse(regexp)
69
+ end
70
+
59
71
  specify('parse escape control sequence lower') do
60
- root = RP.parse(/a\\\c2b/)
72
+ root = parse_meta_control('a\\\\\c2b')
61
73
 
62
74
  expect(root[2]).to be_instance_of(EscapeSequence::Control)
63
75
  expect(root[2].text).to eq '\\c2'
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
66
78
  end
67
79
 
68
80
  specify('parse escape control sequence upper') do
69
- root = RP.parse(/\d\\\C-C\w/)
81
+ root = parse_meta_control('\d\C-C\w')
70
82
 
71
- expect(root[2]).to be_instance_of(EscapeSequence::Control)
72
- expect(root[2].text).to eq '\\C-C'
73
- expect(root[2].char).to eq "\x03"
74
- expect(root[2].codepoint).to eq 3
83
+ expect(root[1]).to be_instance_of(EscapeSequence::Control)
84
+ expect(root[1].text).to eq '\\C-C'
85
+ expect(root[1].char).to eq "\x03"
86
+ expect(root[1].codepoint).to eq 3
75
87
  end
76
88
 
77
89
  specify('parse escape meta sequence') do
78
- root = RP.parse(/\Z\\\M-Z/n)
90
+ root = parse_meta_control('\Z\M-Z')
79
91
 
80
- expect(root[2]).to be_instance_of(EscapeSequence::Meta)
81
- expect(root[2].text).to eq '\\M-Z'
82
- expect(root[2].char).to eq "\u00DA"
83
- expect(root[2].codepoint).to eq 218
92
+ expect(root[1]).to be_instance_of(EscapeSequence::Meta)
93
+ expect(root[1].text).to eq '\\M-Z'
94
+ expect(root[1].char).to eq "\u00DA"
95
+ expect(root[1].codepoint).to eq 218
84
96
  end
85
97
 
86
98
  specify('parse escape meta control sequence') do
87
- root = RP.parse(/\A\\\M-\C-X/n)
99
+ root = parse_meta_control('\A\M-\C-X')
88
100
 
89
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
90
- expect(root[2].text).to eq '\\M-\\C-X'
91
- expect(root[2].char).to eq "\u0098"
92
- expect(root[2].codepoint).to eq 152
101
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
102
+ expect(root[1].text).to eq '\\M-\\C-X'
103
+ expect(root[1].char).to eq "\u0098"
104
+ expect(root[1].codepoint).to eq 152
93
105
  end
94
106
 
95
107
  specify('parse lower c meta control sequence') do
96
- root = RP.parse(/\A\\\M-\cX/n)
108
+ root = parse_meta_control('\A\M-\cX')
97
109
 
98
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
99
- expect(root[2].text).to eq '\\M-\\cX'
100
- expect(root[2].char).to eq "\u0098"
101
- expect(root[2].codepoint).to eq 152
110
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
111
+ expect(root[1].text).to eq '\\M-\\cX'
112
+ expect(root[1].char).to eq "\u0098"
113
+ expect(root[1].codepoint).to eq 152
102
114
  end
103
115
 
104
116
  specify('parse escape reverse meta control sequence') do
105
- root = RP.parse(/\A\\\C-\M-X/n)
117
+ root = parse_meta_control('\A\C-\M-X')
106
118
 
107
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
108
- expect(root[2].text).to eq '\\C-\\M-X'
109
- expect(root[2].char).to eq "\u0098"
110
- expect(root[2].codepoint).to eq 152
119
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
120
+ expect(root[1].text).to eq '\\C-\\M-X'
121
+ expect(root[1].char).to eq "\u0098"
122
+ expect(root[1].codepoint).to eq 152
111
123
  end
112
124
 
113
125
  specify('parse escape reverse lower c meta control sequence') do
114
- root = RP.parse(/\A\\\c\M-X/n)
126
+ root = parse_meta_control('\A\c\M-X')
115
127
 
116
- expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
117
- expect(root[2].text).to eq '\\c\\M-X'
118
- expect(root[2].char).to eq "\u0098"
119
- expect(root[2].codepoint).to eq 152
128
+ expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
129
+ expect(root[1].text).to eq '\\c\\M-X'
130
+ expect(root[1].char).to eq "\u0098"
131
+ expect(root[1].codepoint).to eq 152
120
132
  end
121
133
  end
@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
37
37
  end
38
38
  end
39
39
 
40
- specify('parse all properties of current ruby') do
41
- unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
42
- RP.parse("\\p{#{prop}}") rescue false
40
+ if ruby_version_at_least('2.7.0')
41
+ specify('parse all properties of current ruby') do
42
+ unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
43
+ RP.parse("\\p{#{prop}}") rescue false
44
+ end
45
+ expect(unsupported).to be_empty
43
46
  end
44
- expect(unsupported).to be_empty
45
47
  end
46
48
 
47
49
  specify('parse property negative') do
@@ -1,6 +1,10 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('CharacterSet::Range parsing') do
4
+ # Some edge-case patterns are evaluated with #match to make sure that
5
+ # their behavior still reflects the way they are parsed.
6
+ # #capturing_stderr is used to skip any warnings generated by this.
7
+
4
8
  specify('parse set range') do
5
9
  root = RP.parse('[a-z]')
6
10
  set = root[0]
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
13
17
  expect(range.first).to be_instance_of(Literal)
14
18
  expect(range.last.to_s).to eq 'z'
15
19
  expect(range.last).to be_instance_of(Literal)
16
- expect(set).to match 'm'
20
+ capturing_stderr { expect(set).to match 'm' }
17
21
  end
18
22
 
19
23
  specify('parse set range hex') do
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
28
32
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
33
  expect(range.last.to_s).to eq '\\x22'
30
34
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match "\x11"
35
+ capturing_stderr { expect(set).to match "\x11" }
32
36
  end
33
37
 
34
38
  specify('parse set range unicode') do
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
43
47
  expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
44
48
  expect(range.last.to_s).to eq '\\u1234'
45
49
  expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
46
- expect(set).to match '\\u600'
50
+ capturing_stderr { expect(set).to match '\\u600' }
47
51
  end
48
52
 
49
53
  specify('parse set range edge case leading dash') do
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
53
57
 
54
58
  expect(set.count).to eq 1
55
59
  expect(range.count).to eq 2
56
- expect(set).to match 'a'
60
+ capturing_stderr { expect(set).to match 'a' }
57
61
  end
58
62
 
59
63
  specify('parse set range edge case trailing dash') do
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
63
67
 
64
68
  expect(set.count).to eq 1
65
69
  expect(range.count).to eq 2
66
- expect(set).to match '$'
70
+ capturing_stderr { expect(set).to match '$' }
67
71
  end
68
72
 
69
73
  specify('parse set range edge case leading negate') do
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
71
75
  set = root[0]
72
76
 
73
77
  expect(set.count).to eq 2
74
- expect(set).to match 'a'
75
- expect(set).not_to match 'z'
78
+ capturing_stderr do
79
+ expect(set).to match 'a'
80
+ expect(set).not_to match 'z'
81
+ end
76
82
  end
77
83
 
78
84
  specify('parse set range edge case trailing negate') do
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
82
88
 
83
89
  expect(set.count).to eq 1
84
90
  expect(range.count).to eq 2
85
- expect(set).to match '$'
91
+ capturing_stderr { expect(set).to match '$' }
86
92
  end
87
93
 
88
94
  specify('parse set range edge case leading intersection') do
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
91
97
 
92
98
  expect(set.count).to eq 1
93
99
  expect(set.first.last.to_s).to eq '-bc'
94
- expect(set).to match '-'
95
- expect(set).to match 'b'
96
- expect(set).not_to match 'a'
97
- expect(set).not_to match 'c'
100
+ capturing_stderr do
101
+ expect(set).to match '-'
102
+ expect(set).to match 'b'
103
+ expect(set).not_to match 'a'
104
+ expect(set).not_to match 'c'
105
+ end
98
106
  end
99
107
 
100
108
  specify('parse set range edge case trailing intersection') do
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
103
111
 
104
112
  expect(set.count).to eq 1
105
113
  expect(set.first.first.to_s).to eq 'bc-'
106
- expect(set).to match '-'
107
- expect(set).to match 'b'
108
- expect(set).not_to match 'a'
109
- expect(set).not_to match 'c'
114
+ capturing_stderr do
115
+ expect(set).to match '-'
116
+ expect(set).to match 'b'
117
+ expect(set).not_to match 'a'
118
+ expect(set).not_to match 'c'
119
+ end
110
120
  end
111
121
  end
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
35
35
  include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
36
36
  include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
37
37
 
38
- include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
39
- include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
40
- include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
41
- include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
42
- include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
43
- include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
44
- include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
45
- include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
46
- include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
47
- include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
48
-
49
- include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
50
- include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
51
- include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
52
- include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
53
- include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
54
- include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
55
- include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
56
-
57
38
  include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
58
39
  include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
59
40
  include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
61
42
  include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
62
43
  include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
63
44
  include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
45
+
46
+ # Meta/control espaces
47
+ #
48
+ # After the following fix in Ruby 3.1, a Regexp#source containing meta/control
49
+ # escapes can only be set with the Regexp::new constructor.
50
+ # In Regexp literals, these escapes are now pre-processed to hex escapes.
51
+ #
52
+ # https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
53
+ n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
54
+
55
+ include_examples 'scan', 'a\cBc', 1 => [:escape, :control, '\cB', 1, 4]
56
+ include_examples 'scan', 'a\c^c', 1 => [:escape, :control, '\c^', 1, 4]
57
+ include_examples 'scan', 'a\c\n', 1 => [:escape, :control, '\c\n', 1, 5]
58
+ include_examples 'scan', 'a\c\\\\b', 1 => [:escape, :control, '\c\\\\', 1, 5]
59
+ include_examples 'scan', 'a\C-bc', 1 => [:escape, :control, '\C-b', 1, 5]
60
+ include_examples 'scan', 'a\C-^b', 1 => [:escape, :control, '\C-^', 1, 5]
61
+ include_examples 'scan', 'a\C-\nb', 1 => [:escape, :control, '\C-\n', 1, 6]
62
+ include_examples 'scan', 'a\C-\\\\b', 1 => [:escape, :control, '\C-\\\\', 1, 6]
63
+ include_examples 'scan', n.('a\c\M-Bc'), 1 => [:escape, :control, '\c\M-B', 1, 7]
64
+ include_examples 'scan', n.('a\C-\M-Bc'), 1 => [:escape, :control, '\C-\M-B', 1, 8]
65
+
66
+ include_examples 'scan', n.('a\M-Bc'), 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
67
+ include_examples 'scan', n.('a\M-\cBc'), 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
68
+ include_examples 'scan', n.('a\M-\c^'), 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
69
+ include_examples 'scan', n.('a\M-\c\n'), 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
70
+ include_examples 'scan', n.('a\M-\c\\\\'), 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
71
+ include_examples 'scan', n.('a\M-\C-Bc'), 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
72
+ include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
64
73
  end
@@ -96,21 +96,21 @@ RSpec.describe('Set scanning') do
96
96
  include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
97
97
  include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
98
98
 
99
- include_examples 'scan', /[a\p{digit}c]/, 2 => [:property, :digit, '\p{digit}', 2, 11]
100
- include_examples 'scan', /[a\P{digit}c]/, 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
101
- include_examples 'scan', /[a\p{^digit}c]/, 2 => [:nonproperty, :digit, '\p{^digit}', 2, 12]
102
- include_examples 'scan', /[a\P{^digit}c]/, 2 => [:property, :digit, '\P{^digit}', 2, 12]
99
+ include_examples 'scan', '[a\p{digit}c]', 2 => [:property, :digit, '\p{digit}', 2, 11]
100
+ include_examples 'scan', '[a\P{digit}c]', 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
101
+ include_examples 'scan', '[a\p{^digit}c]', 2 => [:nonproperty, :digit, '\p{^digit}', 2, 12]
102
+ include_examples 'scan', '[a\P{^digit}c]', 2 => [:property, :digit, '\P{^digit}', 2, 12]
103
103
 
104
- include_examples 'scan', /[a\p{ALPHA}c]/, 2 => [:property, :alpha, '\p{ALPHA}', 2, 11]
105
- include_examples 'scan', /[a\p{P}c]/, 2 => [:property, :punctuation,'\p{P}', 2, 7]
106
- include_examples 'scan', /[a\p{P}\P{P}c]/, 3 => [:nonproperty, :punctuation,'\P{P}', 7, 12]
104
+ include_examples 'scan', '[a\p{ALPHA}c]', 2 => [:property, :alpha, '\p{ALPHA}', 2, 11]
105
+ include_examples 'scan', '[a\p{P}c]', 2 => [:property, :punctuation,'\p{P}', 2, 7]
106
+ include_examples 'scan', '[a\p{P}\P{P}c]', 3 => [:nonproperty, :punctuation,'\P{P}', 7, 12]
107
107
 
108
- include_examples 'scan', /[\x20-\x27]/,
108
+ include_examples 'scan', '[\x20-\x27]',
109
109
  1 => [:escape, :hex, '\x20', 1, 5],
110
110
  2 => [:set, :range, '-', 5, 6],
111
111
  3 => [:escape, :hex, '\x27', 6, 10]
112
112
 
113
- include_examples 'scan', /[a-w&&[^c-g]z]/,
113
+ include_examples 'scan', '[a-w&&[^c-g]z]',
114
114
  5 => [:set, :open, '[', 6, 7],
115
115
  6 => [:set, :negate, '^', 7, 8],
116
116
  8 => [:set, :range, '-', 9, 10],
data/spec/spec_helper.rb CHANGED
@@ -1,8 +1,13 @@
1
+ $VERBOSE = true
2
+
1
3
  require 'ice_nine'
2
- require 'regexp_parser'
3
4
  require 'regexp_property_values'
5
+ require_relative 'support/capturing_stderr'
4
6
  require_relative 'support/shared_examples'
5
7
 
8
+ req_warn = capturing_stderr { require('regexp_parser') || fail('pre-required') }
9
+ req_warn.empty? || fail("requiring parser generated warnings:\n#{req_warn}")
10
+
6
11
  RS = Regexp::Scanner
7
12
  RL = Regexp::Lexer
8
13
  RP = Regexp::Parser
@@ -14,3 +19,10 @@ include Regexp::Expression
14
19
  def ruby_version_at_least(version)
15
20
  Gem::Version.new(RUBY_VERSION.dup) >= Gem::Version.new(version)
16
21
  end
22
+
23
+ RSpec.configure do |config|
24
+ config.around(:example) do |example|
25
+ # treat unexpected warnings as failures
26
+ expect { example.run }.not_to output.to_stderr
27
+ end
28
+ end
@@ -0,0 +1,9 @@
1
+ require 'stringio'
2
+
3
+ def capturing_stderr(&block)
4
+ old_stderr, $stderr = $stderr, StringIO.new
5
+ block.call
6
+ $stderr.string
7
+ ensure
8
+ $stderr = old_stderr
9
+ end
@@ -4,14 +4,14 @@ RSpec.describe(Regexp::Syntax::V1_8_6) do
4
4
  include_examples 'syntax', Regexp::Syntax.new('ruby/1.8.6'),
5
5
  implements: {
6
6
  assertion: T::Assertion::Lookahead,
7
- backref: [:number],
7
+ backref: T::Backreference::Plain,
8
8
  escape: T::Escape::Basic + T::Escape::ASCII + T::Escape::Meta + T::Escape::Control,
9
9
  group: T::Group::V1_8_6,
10
10
  quantifier: T::Quantifier::Greedy + T::Quantifier::Reluctant + T::Quantifier::Interval + T::Quantifier::IntervalReluctant
11
11
  },
12
12
  excludes: {
13
13
  assertion: T::Assertion::Lookbehind,
14
- backref: T::Backreference::All - [:number] + T::SubexpressionCall::All,
14
+ backref: T::Backreference::All - T::Backreference::Plain + T::SubexpressionCall::All,
15
15
  quantifier: T::Quantifier::Possessive
16
16
  }
17
17
  end
@@ -7,7 +7,7 @@ RSpec.describe(Regexp::Syntax::V2_0_0) do
7
7
  nonproperty: T::UnicodeProperty::Age_V2_0_0
8
8
  },
9
9
  excludes: {
10
- property: [:newline],
11
- nonproperty: [:newline]
10
+ property: %i[newline],
11
+ nonproperty: %i[newline]
12
12
  }
13
13
  end
@@ -28,6 +28,7 @@ RSpec.describe(Regexp::Syntax) do
28
28
  include_examples 'syntax alias', 'ruby/2.6.2', Regexp::Syntax::V2_6_2
29
29
  include_examples 'syntax alias', 'ruby/2.6.3', Regexp::Syntax::V2_6_3
30
30
  include_examples 'syntax alias', 'ruby/2.6', Regexp::Syntax::V2_6_3
31
+ include_examples 'syntax alias', 'ruby/3.1', Regexp::Syntax::V3_1_0
31
32
 
32
33
  specify('future alias warning') do
33
34
  expect { Regexp::Syntax.new('ruby/5.0') }