regexp_parser 2.1.1 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +15 -21
- data/Rakefile +5 -11
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +0 -2
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +0 -1
- data/lib/regexp_parser/expression/subexpression.rb +0 -1
- data/lib/regexp_parser/expression.rb +6 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/syntax/any.rb +1 -3
- data/lib/regexp_parser/syntax/base.rb +9 -9
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +28 -19
- data/spec/scanner/sets_spec.rb +9 -9
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +26 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
@@ -0,0 +1,45 @@
|
|
1
|
+
# Define the base module and the simplest of tokens.
|
2
|
+
module Regexp::Syntax
|
3
|
+
module Token
|
4
|
+
Map = {}
|
5
|
+
|
6
|
+
module Literal
|
7
|
+
All = %i[literal]
|
8
|
+
Type = :literal
|
9
|
+
end
|
10
|
+
|
11
|
+
module FreeSpace
|
12
|
+
All = %i[comment whitespace]
|
13
|
+
Type = :free_space
|
14
|
+
end
|
15
|
+
|
16
|
+
Map[FreeSpace::Type] = FreeSpace::All
|
17
|
+
Map[Literal::Type] = Literal::All
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Load all the token files, they will populate the Map constant.
|
23
|
+
require 'regexp_parser/syntax/token/anchor'
|
24
|
+
require 'regexp_parser/syntax/token/assertion'
|
25
|
+
require 'regexp_parser/syntax/token/backreference'
|
26
|
+
require 'regexp_parser/syntax/token/posix_class'
|
27
|
+
require 'regexp_parser/syntax/token/character_set'
|
28
|
+
require 'regexp_parser/syntax/token/character_type'
|
29
|
+
require 'regexp_parser/syntax/token/conditional'
|
30
|
+
require 'regexp_parser/syntax/token/escape'
|
31
|
+
require 'regexp_parser/syntax/token/group'
|
32
|
+
require 'regexp_parser/syntax/token/keep'
|
33
|
+
require 'regexp_parser/syntax/token/meta'
|
34
|
+
require 'regexp_parser/syntax/token/quantifier'
|
35
|
+
require 'regexp_parser/syntax/token/unicode_property'
|
36
|
+
|
37
|
+
|
38
|
+
# After loading all the tokens the map is full. Extract all tokens and types
|
39
|
+
# into the All and Types constants.
|
40
|
+
module Regexp::Syntax
|
41
|
+
module Token
|
42
|
+
All = Map.values.flatten.uniq.sort.freeze
|
43
|
+
Types = Map.keys.freeze
|
44
|
+
end
|
45
|
+
end
|
@@ -5,7 +5,7 @@ module Regexp::Syntax
|
|
5
5
|
|
6
6
|
implements :anchor, Anchor::All
|
7
7
|
implements :assertion, Assertion::Lookahead
|
8
|
-
implements :backref,
|
8
|
+
implements :backref, Backreference::Plain
|
9
9
|
implements :posixclass, PosixClass::Standard
|
10
10
|
implements :group, Group::All
|
11
11
|
implements :meta, Meta::Extended
|
data/lib/regexp_parser/syntax.rb
CHANGED
data/lib/regexp_parser/token.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
class Regexp
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
:conditional_level
|
2
|
+
TOKEN_KEYS = %i[
|
3
|
+
type
|
4
|
+
token
|
5
|
+
text
|
6
|
+
ts
|
7
|
+
te
|
8
|
+
level
|
9
|
+
set_level
|
10
|
+
conditional_level
|
12
11
|
].freeze
|
13
12
|
|
14
13
|
Token = Struct.new(*TOKEN_KEYS) do
|
@@ -21,15 +20,5 @@ class Regexp
|
|
21
20
|
def length
|
22
21
|
te - ts
|
23
22
|
end
|
24
|
-
|
25
|
-
if RUBY_VERSION < '2.0.0'
|
26
|
-
def to_h
|
27
|
-
members.inject({}) do |hash, member|
|
28
|
-
hash[member.to_sym] = self[member]
|
29
|
-
hash
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
23
|
end
|
34
|
-
|
35
24
|
end
|
data/lib/regexp_parser.rb
CHANGED
data/spec/lexer/nesting_spec.rb
CHANGED
@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
|
|
59
59
|
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
60
60
|
5 => [:set, :close, ']', 5, 6, 0, 0, 0]
|
61
61
|
|
62
|
-
include_examples 'lex',
|
62
|
+
include_examples 'lex', '[[:word:]&&[^c]z]',
|
63
63
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
64
64
|
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
65
65
|
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
|
|
70
70
|
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
71
71
|
8 => [:set, :close, ']', 16, 17, 0, 0, 0]
|
72
72
|
|
73
|
-
include_examples 'lex',
|
73
|
+
include_examples 'lex', '[\p{word}&&[^c]z]',
|
74
74
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
75
75
|
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
76
76
|
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
|
|
56
56
|
expect { root[5].codepoint }.to raise_error(/#codepoints/)
|
57
57
|
end
|
58
58
|
|
59
|
+
# Meta/control espaces
|
60
|
+
#
|
61
|
+
# After the following fix in Ruby 3.1, a Regexp#source containing meta/control
|
62
|
+
# escapes can only be set with the Regexp::new constructor.
|
63
|
+
# In Regexp literals, these escapes are now pre-processed to hex escapes.
|
64
|
+
#
|
65
|
+
# https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
|
66
|
+
def parse_meta_control(regexp_body)
|
67
|
+
regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
|
68
|
+
RP.parse(regexp)
|
69
|
+
end
|
70
|
+
|
59
71
|
specify('parse escape control sequence lower') do
|
60
|
-
root =
|
72
|
+
root = parse_meta_control('a\\\\\c2b')
|
61
73
|
|
62
74
|
expect(root[2]).to be_instance_of(EscapeSequence::Control)
|
63
75
|
expect(root[2].text).to eq '\\c2'
|
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
|
|
66
78
|
end
|
67
79
|
|
68
80
|
specify('parse escape control sequence upper') do
|
69
|
-
root =
|
81
|
+
root = parse_meta_control('\d\C-C\w')
|
70
82
|
|
71
|
-
expect(root[
|
72
|
-
expect(root[
|
73
|
-
expect(root[
|
74
|
-
expect(root[
|
83
|
+
expect(root[1]).to be_instance_of(EscapeSequence::Control)
|
84
|
+
expect(root[1].text).to eq '\\C-C'
|
85
|
+
expect(root[1].char).to eq "\x03"
|
86
|
+
expect(root[1].codepoint).to eq 3
|
75
87
|
end
|
76
88
|
|
77
89
|
specify('parse escape meta sequence') do
|
78
|
-
root =
|
90
|
+
root = parse_meta_control('\Z\M-Z')
|
79
91
|
|
80
|
-
expect(root[
|
81
|
-
expect(root[
|
82
|
-
expect(root[
|
83
|
-
expect(root[
|
92
|
+
expect(root[1]).to be_instance_of(EscapeSequence::Meta)
|
93
|
+
expect(root[1].text).to eq '\\M-Z'
|
94
|
+
expect(root[1].char).to eq "\u00DA"
|
95
|
+
expect(root[1].codepoint).to eq 218
|
84
96
|
end
|
85
97
|
|
86
98
|
specify('parse escape meta control sequence') do
|
87
|
-
root =
|
99
|
+
root = parse_meta_control('\A\M-\C-X')
|
88
100
|
|
89
|
-
expect(root[
|
90
|
-
expect(root[
|
91
|
-
expect(root[
|
92
|
-
expect(root[
|
101
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
102
|
+
expect(root[1].text).to eq '\\M-\\C-X'
|
103
|
+
expect(root[1].char).to eq "\u0098"
|
104
|
+
expect(root[1].codepoint).to eq 152
|
93
105
|
end
|
94
106
|
|
95
107
|
specify('parse lower c meta control sequence') do
|
96
|
-
root =
|
108
|
+
root = parse_meta_control('\A\M-\cX')
|
97
109
|
|
98
|
-
expect(root[
|
99
|
-
expect(root[
|
100
|
-
expect(root[
|
101
|
-
expect(root[
|
110
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
111
|
+
expect(root[1].text).to eq '\\M-\\cX'
|
112
|
+
expect(root[1].char).to eq "\u0098"
|
113
|
+
expect(root[1].codepoint).to eq 152
|
102
114
|
end
|
103
115
|
|
104
116
|
specify('parse escape reverse meta control sequence') do
|
105
|
-
root =
|
117
|
+
root = parse_meta_control('\A\C-\M-X')
|
106
118
|
|
107
|
-
expect(root[
|
108
|
-
expect(root[
|
109
|
-
expect(root[
|
110
|
-
expect(root[
|
119
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
120
|
+
expect(root[1].text).to eq '\\C-\\M-X'
|
121
|
+
expect(root[1].char).to eq "\u0098"
|
122
|
+
expect(root[1].codepoint).to eq 152
|
111
123
|
end
|
112
124
|
|
113
125
|
specify('parse escape reverse lower c meta control sequence') do
|
114
|
-
root =
|
126
|
+
root = parse_meta_control('\A\c\M-X')
|
115
127
|
|
116
|
-
expect(root[
|
117
|
-
expect(root[
|
118
|
-
expect(root[
|
119
|
-
expect(root[
|
128
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
129
|
+
expect(root[1].text).to eq '\\c\\M-X'
|
130
|
+
expect(root[1].char).to eq "\u0098"
|
131
|
+
expect(root[1].codepoint).to eq 152
|
120
132
|
end
|
121
133
|
end
|
@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
if ruby_version_at_least('2.7.0')
|
41
|
+
specify('parse all properties of current ruby') do
|
42
|
+
unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
|
43
|
+
RP.parse("\\p{#{prop}}") rescue false
|
44
|
+
end
|
45
|
+
expect(unsupported).to be_empty
|
43
46
|
end
|
44
|
-
expect(unsupported).to be_empty
|
45
47
|
end
|
46
48
|
|
47
49
|
specify('parse property negative') do
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('CharacterSet::Range parsing') do
|
4
|
+
# Some edge-case patterns are evaluated with #match to make sure that
|
5
|
+
# their behavior still reflects the way they are parsed.
|
6
|
+
# #capturing_stderr is used to skip any warnings generated by this.
|
7
|
+
|
4
8
|
specify('parse set range') do
|
5
9
|
root = RP.parse('[a-z]')
|
6
10
|
set = root[0]
|
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
13
17
|
expect(range.first).to be_instance_of(Literal)
|
14
18
|
expect(range.last.to_s).to eq 'z'
|
15
19
|
expect(range.last).to be_instance_of(Literal)
|
16
|
-
expect(set).to match 'm'
|
20
|
+
capturing_stderr { expect(set).to match 'm' }
|
17
21
|
end
|
18
22
|
|
19
23
|
specify('parse set range hex') do
|
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
28
32
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
33
|
expect(range.last.to_s).to eq '\\x22'
|
30
34
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match "\x11"
|
35
|
+
capturing_stderr { expect(set).to match "\x11" }
|
32
36
|
end
|
33
37
|
|
34
38
|
specify('parse set range unicode') do
|
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
43
47
|
expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
|
44
48
|
expect(range.last.to_s).to eq '\\u1234'
|
45
49
|
expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
|
46
|
-
expect(set).to match '\\u600'
|
50
|
+
capturing_stderr { expect(set).to match '\\u600' }
|
47
51
|
end
|
48
52
|
|
49
53
|
specify('parse set range edge case leading dash') do
|
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
53
57
|
|
54
58
|
expect(set.count).to eq 1
|
55
59
|
expect(range.count).to eq 2
|
56
|
-
expect(set).to match 'a'
|
60
|
+
capturing_stderr { expect(set).to match 'a' }
|
57
61
|
end
|
58
62
|
|
59
63
|
specify('parse set range edge case trailing dash') do
|
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
63
67
|
|
64
68
|
expect(set.count).to eq 1
|
65
69
|
expect(range.count).to eq 2
|
66
|
-
expect(set).to match '$'
|
70
|
+
capturing_stderr { expect(set).to match '$' }
|
67
71
|
end
|
68
72
|
|
69
73
|
specify('parse set range edge case leading negate') do
|
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
71
75
|
set = root[0]
|
72
76
|
|
73
77
|
expect(set.count).to eq 2
|
74
|
-
|
75
|
-
|
78
|
+
capturing_stderr do
|
79
|
+
expect(set).to match 'a'
|
80
|
+
expect(set).not_to match 'z'
|
81
|
+
end
|
76
82
|
end
|
77
83
|
|
78
84
|
specify('parse set range edge case trailing negate') do
|
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
82
88
|
|
83
89
|
expect(set.count).to eq 1
|
84
90
|
expect(range.count).to eq 2
|
85
|
-
expect(set).to match '$'
|
91
|
+
capturing_stderr { expect(set).to match '$' }
|
86
92
|
end
|
87
93
|
|
88
94
|
specify('parse set range edge case leading intersection') do
|
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
91
97
|
|
92
98
|
expect(set.count).to eq 1
|
93
99
|
expect(set.first.last.to_s).to eq '-bc'
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
100
|
+
capturing_stderr do
|
101
|
+
expect(set).to match '-'
|
102
|
+
expect(set).to match 'b'
|
103
|
+
expect(set).not_to match 'a'
|
104
|
+
expect(set).not_to match 'c'
|
105
|
+
end
|
98
106
|
end
|
99
107
|
|
100
108
|
specify('parse set range edge case trailing intersection') do
|
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
103
111
|
|
104
112
|
expect(set.count).to eq 1
|
105
113
|
expect(set.first.first.to_s).to eq 'bc-'
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
114
|
+
capturing_stderr do
|
115
|
+
expect(set).to match '-'
|
116
|
+
expect(set).to match 'b'
|
117
|
+
expect(set).not_to match 'a'
|
118
|
+
expect(set).not_to match 'c'
|
119
|
+
end
|
110
120
|
end
|
111
121
|
end
|
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
|
|
35
35
|
include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
|
36
36
|
include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
|
37
37
|
|
38
|
-
include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
|
39
|
-
include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
|
40
|
-
include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
|
41
|
-
include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
|
42
|
-
include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
|
43
|
-
include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
|
44
|
-
include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
|
45
|
-
include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
|
46
|
-
include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
|
47
|
-
include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
|
48
|
-
|
49
|
-
include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
|
50
|
-
include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
|
51
|
-
include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
|
52
|
-
include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
|
53
|
-
include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
|
54
|
-
include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
|
55
|
-
include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
|
56
|
-
|
57
38
|
include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
58
39
|
include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
59
40
|
include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
|
|
61
42
|
include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
|
62
43
|
include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
|
63
44
|
include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
|
45
|
+
|
46
|
+
# Meta/control espaces
|
47
|
+
#
|
48
|
+
# After the following fix in Ruby 3.1, a Regexp#source containing meta/control
|
49
|
+
# escapes can only be set with the Regexp::new constructor.
|
50
|
+
# In Regexp literals, these escapes are now pre-processed to hex escapes.
|
51
|
+
#
|
52
|
+
# https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
|
53
|
+
n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
|
54
|
+
|
55
|
+
include_examples 'scan', 'a\cBc', 1 => [:escape, :control, '\cB', 1, 4]
|
56
|
+
include_examples 'scan', 'a\c^c', 1 => [:escape, :control, '\c^', 1, 4]
|
57
|
+
include_examples 'scan', 'a\c\n', 1 => [:escape, :control, '\c\n', 1, 5]
|
58
|
+
include_examples 'scan', 'a\c\\\\b', 1 => [:escape, :control, '\c\\\\', 1, 5]
|
59
|
+
include_examples 'scan', 'a\C-bc', 1 => [:escape, :control, '\C-b', 1, 5]
|
60
|
+
include_examples 'scan', 'a\C-^b', 1 => [:escape, :control, '\C-^', 1, 5]
|
61
|
+
include_examples 'scan', 'a\C-\nb', 1 => [:escape, :control, '\C-\n', 1, 6]
|
62
|
+
include_examples 'scan', 'a\C-\\\\b', 1 => [:escape, :control, '\C-\\\\', 1, 6]
|
63
|
+
include_examples 'scan', n.('a\c\M-Bc'), 1 => [:escape, :control, '\c\M-B', 1, 7]
|
64
|
+
include_examples 'scan', n.('a\C-\M-Bc'), 1 => [:escape, :control, '\C-\M-B', 1, 8]
|
65
|
+
|
66
|
+
include_examples 'scan', n.('a\M-Bc'), 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
|
67
|
+
include_examples 'scan', n.('a\M-\cBc'), 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
|
68
|
+
include_examples 'scan', n.('a\M-\c^'), 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
|
69
|
+
include_examples 'scan', n.('a\M-\c\n'), 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
|
70
|
+
include_examples 'scan', n.('a\M-\c\\\\'), 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
|
71
|
+
include_examples 'scan', n.('a\M-\C-Bc'), 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
|
72
|
+
include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
|
64
73
|
end
|
data/spec/scanner/sets_spec.rb
CHANGED
@@ -96,21 +96,21 @@ RSpec.describe('Set scanning') do
|
|
96
96
|
include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
|
97
97
|
include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
|
98
98
|
|
99
|
-
include_examples 'scan',
|
100
|
-
include_examples 'scan',
|
101
|
-
include_examples 'scan',
|
102
|
-
include_examples 'scan',
|
99
|
+
include_examples 'scan', '[a\p{digit}c]', 2 => [:property, :digit, '\p{digit}', 2, 11]
|
100
|
+
include_examples 'scan', '[a\P{digit}c]', 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
|
101
|
+
include_examples 'scan', '[a\p{^digit}c]', 2 => [:nonproperty, :digit, '\p{^digit}', 2, 12]
|
102
|
+
include_examples 'scan', '[a\P{^digit}c]', 2 => [:property, :digit, '\P{^digit}', 2, 12]
|
103
103
|
|
104
|
-
include_examples 'scan',
|
105
|
-
include_examples 'scan',
|
106
|
-
include_examples 'scan',
|
104
|
+
include_examples 'scan', '[a\p{ALPHA}c]', 2 => [:property, :alpha, '\p{ALPHA}', 2, 11]
|
105
|
+
include_examples 'scan', '[a\p{P}c]', 2 => [:property, :punctuation,'\p{P}', 2, 7]
|
106
|
+
include_examples 'scan', '[a\p{P}\P{P}c]', 3 => [:nonproperty, :punctuation,'\P{P}', 7, 12]
|
107
107
|
|
108
|
-
include_examples 'scan',
|
108
|
+
include_examples 'scan', '[\x20-\x27]',
|
109
109
|
1 => [:escape, :hex, '\x20', 1, 5],
|
110
110
|
2 => [:set, :range, '-', 5, 6],
|
111
111
|
3 => [:escape, :hex, '\x27', 6, 10]
|
112
112
|
|
113
|
-
include_examples 'scan',
|
113
|
+
include_examples 'scan', '[a-w&&[^c-g]z]',
|
114
114
|
5 => [:set, :open, '[', 6, 7],
|
115
115
|
6 => [:set, :negate, '^', 7, 8],
|
116
116
|
8 => [:set, :range, '-', 9, 10],
|
data/spec/spec_helper.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
|
+
$VERBOSE = true
|
2
|
+
|
1
3
|
require 'ice_nine'
|
2
|
-
require 'regexp_parser'
|
3
4
|
require 'regexp_property_values'
|
5
|
+
require_relative 'support/capturing_stderr'
|
4
6
|
require_relative 'support/shared_examples'
|
5
7
|
|
8
|
+
req_warn = capturing_stderr { require('regexp_parser') || fail('pre-required') }
|
9
|
+
req_warn.empty? || fail("requiring parser generated warnings:\n#{req_warn}")
|
10
|
+
|
6
11
|
RS = Regexp::Scanner
|
7
12
|
RL = Regexp::Lexer
|
8
13
|
RP = Regexp::Parser
|
@@ -14,3 +19,10 @@ include Regexp::Expression
|
|
14
19
|
def ruby_version_at_least(version)
|
15
20
|
Gem::Version.new(RUBY_VERSION.dup) >= Gem::Version.new(version)
|
16
21
|
end
|
22
|
+
|
23
|
+
RSpec.configure do |config|
|
24
|
+
config.around(:example) do |example|
|
25
|
+
# treat unexpected warnings as failures
|
26
|
+
expect { example.run }.not_to output.to_stderr
|
27
|
+
end
|
28
|
+
end
|
@@ -4,14 +4,14 @@ RSpec.describe(Regexp::Syntax::V1_8_6) do
|
|
4
4
|
include_examples 'syntax', Regexp::Syntax.new('ruby/1.8.6'),
|
5
5
|
implements: {
|
6
6
|
assertion: T::Assertion::Lookahead,
|
7
|
-
backref:
|
7
|
+
backref: T::Backreference::Plain,
|
8
8
|
escape: T::Escape::Basic + T::Escape::ASCII + T::Escape::Meta + T::Escape::Control,
|
9
9
|
group: T::Group::V1_8_6,
|
10
10
|
quantifier: T::Quantifier::Greedy + T::Quantifier::Reluctant + T::Quantifier::Interval + T::Quantifier::IntervalReluctant
|
11
11
|
},
|
12
12
|
excludes: {
|
13
13
|
assertion: T::Assertion::Lookbehind,
|
14
|
-
backref: T::Backreference::All -
|
14
|
+
backref: T::Backreference::All - T::Backreference::Plain + T::SubexpressionCall::All,
|
15
15
|
quantifier: T::Quantifier::Possessive
|
16
16
|
}
|
17
17
|
end
|
@@ -28,6 +28,7 @@ RSpec.describe(Regexp::Syntax) do
|
|
28
28
|
include_examples 'syntax alias', 'ruby/2.6.2', Regexp::Syntax::V2_6_2
|
29
29
|
include_examples 'syntax alias', 'ruby/2.6.3', Regexp::Syntax::V2_6_3
|
30
30
|
include_examples 'syntax alias', 'ruby/2.6', Regexp::Syntax::V2_6_3
|
31
|
+
include_examples 'syntax alias', 'ruby/3.1', Regexp::Syntax::V3_1_0
|
31
32
|
|
32
33
|
specify('future alias warning') do
|
33
34
|
expect { Regexp::Syntax.new('ruby/5.0') }
|