regexp_parser 2.0.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/Gemfile +5 -1
- data/README.md +15 -21
- data/Rakefile +11 -17
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -10
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/scanner/scanner.rl +64 -87
- data/lib/regexp_parser/scanner.rb +1024 -1073
- data/lib/regexp_parser/syntax/any.rb +2 -4
- data/lib/regexp_parser/syntax/base.rb +10 -10
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +29 -20
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +66 -23
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +27 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
@@ -0,0 +1,45 @@
|
|
1
|
+
# Define the base module and the simplest of tokens.
|
2
|
+
module Regexp::Syntax
|
3
|
+
module Token
|
4
|
+
Map = {}
|
5
|
+
|
6
|
+
module Literal
|
7
|
+
All = %i[literal]
|
8
|
+
Type = :literal
|
9
|
+
end
|
10
|
+
|
11
|
+
module FreeSpace
|
12
|
+
All = %i[comment whitespace]
|
13
|
+
Type = :free_space
|
14
|
+
end
|
15
|
+
|
16
|
+
Map[FreeSpace::Type] = FreeSpace::All
|
17
|
+
Map[Literal::Type] = Literal::All
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Load all the token files, they will populate the Map constant.
|
23
|
+
require 'regexp_parser/syntax/token/anchor'
|
24
|
+
require 'regexp_parser/syntax/token/assertion'
|
25
|
+
require 'regexp_parser/syntax/token/backreference'
|
26
|
+
require 'regexp_parser/syntax/token/posix_class'
|
27
|
+
require 'regexp_parser/syntax/token/character_set'
|
28
|
+
require 'regexp_parser/syntax/token/character_type'
|
29
|
+
require 'regexp_parser/syntax/token/conditional'
|
30
|
+
require 'regexp_parser/syntax/token/escape'
|
31
|
+
require 'regexp_parser/syntax/token/group'
|
32
|
+
require 'regexp_parser/syntax/token/keep'
|
33
|
+
require 'regexp_parser/syntax/token/meta'
|
34
|
+
require 'regexp_parser/syntax/token/quantifier'
|
35
|
+
require 'regexp_parser/syntax/token/unicode_property'
|
36
|
+
|
37
|
+
|
38
|
+
# After loading all the tokens the map is full. Extract all tokens and types
|
39
|
+
# into the All and Types constants.
|
40
|
+
module Regexp::Syntax
|
41
|
+
module Token
|
42
|
+
All = Map.values.flatten.uniq.sort.freeze
|
43
|
+
Types = Map.keys.freeze
|
44
|
+
end
|
45
|
+
end
|
@@ -3,13 +3,13 @@ module Regexp::Syntax
|
|
3
3
|
VERSION_REGEXP = /#{VERSION_FORMAT}/
|
4
4
|
VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
|
5
5
|
|
6
|
-
class InvalidVersionNameError < SyntaxError
|
6
|
+
class InvalidVersionNameError < Regexp::Syntax::SyntaxError
|
7
7
|
def initialize(name)
|
8
8
|
super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
class UnknownSyntaxNameError < SyntaxError
|
12
|
+
class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
|
13
13
|
def initialize(name)
|
14
14
|
super "Unknown syntax name '#{name}'."
|
15
15
|
end
|
@@ -5,7 +5,7 @@ module Regexp::Syntax
|
|
5
5
|
|
6
6
|
implements :anchor, Anchor::All
|
7
7
|
implements :assertion, Assertion::Lookahead
|
8
|
-
implements :backref,
|
8
|
+
implements :backref, Backreference::Plain
|
9
9
|
implements :posixclass, PosixClass::Standard
|
10
10
|
implements :group, Group::All
|
11
11
|
implements :meta, Meta::Extended
|
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require File.expand_path('../syntax/base', __FILE__)
|
3
|
-
require File.expand_path('../syntax/any', __FILE__)
|
4
|
-
require File.expand_path('../syntax/version_lookup', __FILE__)
|
5
|
-
require File.expand_path('../syntax/versions', __FILE__)
|
1
|
+
require 'regexp_parser/error'
|
6
2
|
|
7
3
|
module Regexp::Syntax
|
8
|
-
class SyntaxError <
|
4
|
+
class SyntaxError < Regexp::Parser::Error; end
|
9
5
|
end
|
6
|
+
|
7
|
+
require_relative 'syntax/token'
|
8
|
+
require_relative 'syntax/base'
|
9
|
+
require_relative 'syntax/any'
|
10
|
+
require_relative 'syntax/version_lookup'
|
11
|
+
require_relative 'syntax/versions'
|
data/lib/regexp_parser/token.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
class Regexp
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
:conditional_level
|
2
|
+
TOKEN_KEYS = %i[
|
3
|
+
type
|
4
|
+
token
|
5
|
+
text
|
6
|
+
ts
|
7
|
+
te
|
8
|
+
level
|
9
|
+
set_level
|
10
|
+
conditional_level
|
12
11
|
].freeze
|
13
12
|
|
14
13
|
Token = Struct.new(*TOKEN_KEYS) do
|
@@ -21,15 +20,5 @@ class Regexp
|
|
21
20
|
def length
|
22
21
|
te - ts
|
23
22
|
end
|
24
|
-
|
25
|
-
if RUBY_VERSION < '2.0.0'
|
26
|
-
def to_h
|
27
|
-
members.inject({}) do |hash, member|
|
28
|
-
hash[member.to_sym] = self[member]
|
29
|
-
hash
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
23
|
end
|
34
|
-
|
35
24
|
end
|
data/lib/regexp_parser.rb
CHANGED
@@ -27,8 +27,8 @@ RSpec.describe('Expression#clone') do
|
|
27
27
|
expect(root_2.quantifier.object_id).not_to eq copy_2.quantifier.object_id
|
28
28
|
|
29
29
|
# regression test
|
30
|
-
expect { root_2.clone }.not_to
|
31
|
-
expect { root_2.clone }.not_to
|
30
|
+
expect { root_2.clone }.not_to(change { root_2.quantifier.object_id })
|
31
|
+
expect { root_2.clone }.not_to(change { root_2.quantifier.text.object_id })
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('Subexpression#clone') do
|
@@ -48,7 +48,7 @@ RSpec.describe('Expression#clone') do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
# regression test
|
51
|
-
expect { root.clone }.not_to
|
51
|
+
expect { root.clone }.not_to(change { root.expressions.object_id })
|
52
52
|
end
|
53
53
|
|
54
54
|
specify('Group::Named#clone') do
|
@@ -69,7 +69,39 @@ RSpec.describe('Expression#clone') do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# regression test
|
72
|
-
expect { root_1.clone }.not_to
|
72
|
+
expect { root_1.clone }.not_to(change { root_1.name.object_id })
|
73
|
+
end
|
74
|
+
|
75
|
+
specify('Group::Options#clone') do
|
76
|
+
root = RP.parse('foo(?i)bar')
|
77
|
+
copy = root.clone
|
78
|
+
|
79
|
+
expect(copy.to_s).to eq root.to_s
|
80
|
+
|
81
|
+
root_1 = root[1]
|
82
|
+
copy_1 = copy[1]
|
83
|
+
|
84
|
+
expect(root_1.option_changes).to eq copy_1.option_changes
|
85
|
+
expect(root_1.option_changes.object_id).not_to eq copy_1.option_changes.object_id
|
86
|
+
|
87
|
+
# regression test
|
88
|
+
expect { root_1.clone }.not_to(change { root_1.option_changes.object_id })
|
89
|
+
end
|
90
|
+
|
91
|
+
specify('Backreference::Base#clone') do
|
92
|
+
root = RP.parse('(foo)\1')
|
93
|
+
copy = root.clone
|
94
|
+
|
95
|
+
expect(copy.to_s).to eq root.to_s
|
96
|
+
|
97
|
+
root_1 = root[1]
|
98
|
+
copy_1 = copy[1]
|
99
|
+
|
100
|
+
expect(root_1.referenced_expression.to_s).to eq copy_1.referenced_expression.to_s
|
101
|
+
expect(root_1.referenced_expression.object_id).not_to eq copy_1.referenced_expression.object_id
|
102
|
+
|
103
|
+
# regression test
|
104
|
+
expect { root_1.clone }.not_to(change { root_1.referenced_expression.object_id })
|
73
105
|
end
|
74
106
|
|
75
107
|
specify('Sequence#clone') do
|
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
|
|
10
10
|
space = root[0]
|
11
11
|
|
12
12
|
expect(space).to be_instance_of(FreeSpace::WhiteSpace)
|
13
|
-
expect { space.quantify(:dummy, '#') }.to raise_error(
|
13
|
+
expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
|
14
14
|
end
|
15
15
|
|
16
16
|
specify('comment quantify raises error') do
|
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
|
|
22
22
|
comment = root[3]
|
23
23
|
|
24
24
|
expect(comment).to be_instance_of(FreeSpace::Comment)
|
25
|
-
expect { comment.quantify(:dummy, '#') }.to raise_error(
|
25
|
+
expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
|
26
26
|
end
|
27
27
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
|
4
|
-
ML = described_class
|
3
|
+
ML = Regexp::MatchLength
|
5
4
|
|
5
|
+
RSpec.describe(Regexp::MatchLength) do
|
6
6
|
specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
|
7
7
|
specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
|
8
8
|
specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
|
data/spec/lexer/nesting_spec.rb
CHANGED
@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
|
|
59
59
|
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
60
60
|
5 => [:set, :close, ']', 5, 6, 0, 0, 0]
|
61
61
|
|
62
|
-
include_examples 'lex',
|
62
|
+
include_examples 'lex', '[[:word:]&&[^c]z]',
|
63
63
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
64
64
|
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
65
65
|
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
|
|
70
70
|
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
71
71
|
8 => [:set, :close, ']', 16, 17, 0, 0, 0]
|
72
72
|
|
73
|
-
include_examples 'lex',
|
73
|
+
include_examples 'lex', '[\p{word}&&[^c]z]',
|
74
74
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
75
75
|
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
76
76
|
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
data/spec/lexer/refcalls_spec.rb
CHANGED
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
|
|
32
32
|
include_examples 'lex', "(abc)\\g'1'",
|
33
33
|
3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
|
34
34
|
|
35
|
+
include_examples 'lex', '\g<0>',
|
36
|
+
0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
|
37
|
+
include_examples 'lex', "\\g'0'",
|
38
|
+
0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
|
39
|
+
|
35
40
|
include_examples 'lex', '(abc)\g<-1>',
|
36
41
|
3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
|
37
42
|
include_examples 'lex', "(abc)\\g'-1'",
|
data/spec/parser/all_spec.rb
CHANGED
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
|
|
34
34
|
end
|
35
35
|
|
36
36
|
specify('parse no quantifier target raises error') do
|
37
|
-
expect { RP.parse('?abc') }.to raise_error(
|
37
|
+
expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
|
38
38
|
end
|
39
39
|
|
40
40
|
specify('parse sequence no quantifier target raises error') do
|
41
|
-
expect { RP.parse('abc|?def') }.to raise_error(
|
41
|
+
expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
|
42
42
|
end
|
43
43
|
end
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
|
|
56
56
|
expect { root[5].codepoint }.to raise_error(/#codepoints/)
|
57
57
|
end
|
58
58
|
|
59
|
+
# Meta/control espaces
|
60
|
+
#
|
61
|
+
# After the following fix in Ruby 3.1, a Regexp#source containing meta/control
|
62
|
+
# escapes can only be set with the Regexp::new constructor.
|
63
|
+
# In Regexp literals, these escapes are now pre-processed to hex escapes.
|
64
|
+
#
|
65
|
+
# https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
|
66
|
+
def parse_meta_control(regexp_body)
|
67
|
+
regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
|
68
|
+
RP.parse(regexp)
|
69
|
+
end
|
70
|
+
|
59
71
|
specify('parse escape control sequence lower') do
|
60
|
-
root =
|
72
|
+
root = parse_meta_control('a\\\\\c2b')
|
61
73
|
|
62
74
|
expect(root[2]).to be_instance_of(EscapeSequence::Control)
|
63
75
|
expect(root[2].text).to eq '\\c2'
|
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
|
|
66
78
|
end
|
67
79
|
|
68
80
|
specify('parse escape control sequence upper') do
|
69
|
-
root =
|
81
|
+
root = parse_meta_control('\d\C-C\w')
|
70
82
|
|
71
|
-
expect(root[
|
72
|
-
expect(root[
|
73
|
-
expect(root[
|
74
|
-
expect(root[
|
83
|
+
expect(root[1]).to be_instance_of(EscapeSequence::Control)
|
84
|
+
expect(root[1].text).to eq '\\C-C'
|
85
|
+
expect(root[1].char).to eq "\x03"
|
86
|
+
expect(root[1].codepoint).to eq 3
|
75
87
|
end
|
76
88
|
|
77
89
|
specify('parse escape meta sequence') do
|
78
|
-
root =
|
90
|
+
root = parse_meta_control('\Z\M-Z')
|
79
91
|
|
80
|
-
expect(root[
|
81
|
-
expect(root[
|
82
|
-
expect(root[
|
83
|
-
expect(root[
|
92
|
+
expect(root[1]).to be_instance_of(EscapeSequence::Meta)
|
93
|
+
expect(root[1].text).to eq '\\M-Z'
|
94
|
+
expect(root[1].char).to eq "\u00DA"
|
95
|
+
expect(root[1].codepoint).to eq 218
|
84
96
|
end
|
85
97
|
|
86
98
|
specify('parse escape meta control sequence') do
|
87
|
-
root =
|
99
|
+
root = parse_meta_control('\A\M-\C-X')
|
88
100
|
|
89
|
-
expect(root[
|
90
|
-
expect(root[
|
91
|
-
expect(root[
|
92
|
-
expect(root[
|
101
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
102
|
+
expect(root[1].text).to eq '\\M-\\C-X'
|
103
|
+
expect(root[1].char).to eq "\u0098"
|
104
|
+
expect(root[1].codepoint).to eq 152
|
93
105
|
end
|
94
106
|
|
95
107
|
specify('parse lower c meta control sequence') do
|
96
|
-
root =
|
108
|
+
root = parse_meta_control('\A\M-\cX')
|
97
109
|
|
98
|
-
expect(root[
|
99
|
-
expect(root[
|
100
|
-
expect(root[
|
101
|
-
expect(root[
|
110
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
111
|
+
expect(root[1].text).to eq '\\M-\\cX'
|
112
|
+
expect(root[1].char).to eq "\u0098"
|
113
|
+
expect(root[1].codepoint).to eq 152
|
102
114
|
end
|
103
115
|
|
104
116
|
specify('parse escape reverse meta control sequence') do
|
105
|
-
root =
|
117
|
+
root = parse_meta_control('\A\C-\M-X')
|
106
118
|
|
107
|
-
expect(root[
|
108
|
-
expect(root[
|
109
|
-
expect(root[
|
110
|
-
expect(root[
|
119
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
120
|
+
expect(root[1].text).to eq '\\C-\\M-X'
|
121
|
+
expect(root[1].char).to eq "\u0098"
|
122
|
+
expect(root[1].codepoint).to eq 152
|
111
123
|
end
|
112
124
|
|
113
125
|
specify('parse escape reverse lower c meta control sequence') do
|
114
|
-
root =
|
126
|
+
root = parse_meta_control('\A\c\M-X')
|
115
127
|
|
116
|
-
expect(root[
|
117
|
-
expect(root[
|
118
|
-
expect(root[
|
119
|
-
expect(root[
|
128
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
129
|
+
expect(root[1].text).to eq '\\c\\M-X'
|
130
|
+
expect(root[1].char).to eq "\u0098"
|
131
|
+
expect(root[1].codepoint).to eq 152
|
120
132
|
end
|
121
133
|
end
|
@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
if ruby_version_at_least('2.7.0')
|
41
|
+
specify('parse all properties of current ruby') do
|
42
|
+
unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
|
43
|
+
RP.parse("\\p{#{prop}}") rescue false
|
44
|
+
end
|
45
|
+
expect(unsupported).to be_empty
|
43
46
|
end
|
44
|
-
expect(unsupported).to be_empty
|
45
47
|
end
|
46
48
|
|
47
49
|
specify('parse property negative') do
|
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
|
|
29
29
|
include_examples 'parse', /(abc)\g'1'/,
|
30
30
|
1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
|
31
31
|
|
32
|
+
include_examples 'parse', '\g<0>',
|
33
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
34
|
+
include_examples 'parse', "\\g'0'",
|
35
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
36
|
+
|
32
37
|
include_examples 'parse', /(abc)\g<-1>/,
|
33
38
|
1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
|
34
39
|
include_examples 'parse', /(abc)\g'-1'/,
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('CharacterSet::Range parsing') do
|
4
|
+
# Some edge-case patterns are evaluated with #match to make sure that
|
5
|
+
# their behavior still reflects the way they are parsed.
|
6
|
+
# #capturing_stderr is used to skip any warnings generated by this.
|
7
|
+
|
4
8
|
specify('parse set range') do
|
5
9
|
root = RP.parse('[a-z]')
|
6
10
|
set = root[0]
|
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
13
17
|
expect(range.first).to be_instance_of(Literal)
|
14
18
|
expect(range.last.to_s).to eq 'z'
|
15
19
|
expect(range.last).to be_instance_of(Literal)
|
16
|
-
expect(set).to match 'm'
|
20
|
+
capturing_stderr { expect(set).to match 'm' }
|
17
21
|
end
|
18
22
|
|
19
23
|
specify('parse set range hex') do
|
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
28
32
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
33
|
expect(range.last.to_s).to eq '\\x22'
|
30
34
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match "\x11"
|
35
|
+
capturing_stderr { expect(set).to match "\x11" }
|
32
36
|
end
|
33
37
|
|
34
38
|
specify('parse set range unicode') do
|
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
43
47
|
expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
|
44
48
|
expect(range.last.to_s).to eq '\\u1234'
|
45
49
|
expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
|
46
|
-
expect(set).to match '\\u600'
|
50
|
+
capturing_stderr { expect(set).to match '\\u600' }
|
47
51
|
end
|
48
52
|
|
49
53
|
specify('parse set range edge case leading dash') do
|
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
53
57
|
|
54
58
|
expect(set.count).to eq 1
|
55
59
|
expect(range.count).to eq 2
|
56
|
-
expect(set).to match 'a'
|
60
|
+
capturing_stderr { expect(set).to match 'a' }
|
57
61
|
end
|
58
62
|
|
59
63
|
specify('parse set range edge case trailing dash') do
|
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
63
67
|
|
64
68
|
expect(set.count).to eq 1
|
65
69
|
expect(range.count).to eq 2
|
66
|
-
expect(set).to match '$'
|
70
|
+
capturing_stderr { expect(set).to match '$' }
|
67
71
|
end
|
68
72
|
|
69
73
|
specify('parse set range edge case leading negate') do
|
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
71
75
|
set = root[0]
|
72
76
|
|
73
77
|
expect(set.count).to eq 2
|
74
|
-
|
75
|
-
|
78
|
+
capturing_stderr do
|
79
|
+
expect(set).to match 'a'
|
80
|
+
expect(set).not_to match 'z'
|
81
|
+
end
|
76
82
|
end
|
77
83
|
|
78
84
|
specify('parse set range edge case trailing negate') do
|
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
82
88
|
|
83
89
|
expect(set.count).to eq 1
|
84
90
|
expect(range.count).to eq 2
|
85
|
-
expect(set).to match '$'
|
91
|
+
capturing_stderr { expect(set).to match '$' }
|
86
92
|
end
|
87
93
|
|
88
94
|
specify('parse set range edge case leading intersection') do
|
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
91
97
|
|
92
98
|
expect(set.count).to eq 1
|
93
99
|
expect(set.first.last.to_s).to eq '-bc'
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
100
|
+
capturing_stderr do
|
101
|
+
expect(set).to match '-'
|
102
|
+
expect(set).to match 'b'
|
103
|
+
expect(set).not_to match 'a'
|
104
|
+
expect(set).not_to match 'c'
|
105
|
+
end
|
98
106
|
end
|
99
107
|
|
100
108
|
specify('parse set range edge case trailing intersection') do
|
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
103
111
|
|
104
112
|
expect(set.count).to eq 1
|
105
113
|
expect(set.first.first.to_s).to eq 'bc-'
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
114
|
+
capturing_stderr do
|
115
|
+
expect(set).to match '-'
|
116
|
+
expect(set).to match 'b'
|
117
|
+
expect(set).not_to match 'a'
|
118
|
+
expect(set).not_to match 'c'
|
119
|
+
end
|
110
120
|
end
|
111
121
|
end
|
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
|
|
4
4
|
include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
|
5
5
|
|
6
6
|
# not an escape outside a character set
|
7
|
-
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1,
|
7
|
+
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
|
8
8
|
|
9
9
|
include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
|
10
10
|
include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
|
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
|
|
35
35
|
include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
|
36
36
|
include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
|
37
37
|
|
38
|
-
include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
|
39
|
-
include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
|
40
|
-
include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
|
41
|
-
include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
|
42
|
-
include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
|
43
|
-
include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
|
44
|
-
include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
|
45
|
-
include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
|
46
|
-
include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
|
47
|
-
include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
|
48
|
-
|
49
|
-
include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
|
50
|
-
include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
|
51
|
-
include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
|
52
|
-
include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
|
53
|
-
include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
|
54
|
-
include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
|
55
|
-
include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
|
56
|
-
|
57
38
|
include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
58
39
|
include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
59
40
|
include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
|
|
61
42
|
include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
|
62
43
|
include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
|
63
44
|
include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
|
45
|
+
|
46
|
+
# Meta/control espaces
|
47
|
+
#
|
48
|
+
# After the following fix in Ruby 3.1, a Regexp#source containing meta/control
|
49
|
+
# escapes can only be set with the Regexp::new constructor.
|
50
|
+
# In Regexp literals, these escapes are now pre-processed to hex escapes.
|
51
|
+
#
|
52
|
+
# https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
|
53
|
+
n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
|
54
|
+
|
55
|
+
include_examples 'scan', 'a\cBc', 1 => [:escape, :control, '\cB', 1, 4]
|
56
|
+
include_examples 'scan', 'a\c^c', 1 => [:escape, :control, '\c^', 1, 4]
|
57
|
+
include_examples 'scan', 'a\c\n', 1 => [:escape, :control, '\c\n', 1, 5]
|
58
|
+
include_examples 'scan', 'a\c\\\\b', 1 => [:escape, :control, '\c\\\\', 1, 5]
|
59
|
+
include_examples 'scan', 'a\C-bc', 1 => [:escape, :control, '\C-b', 1, 5]
|
60
|
+
include_examples 'scan', 'a\C-^b', 1 => [:escape, :control, '\C-^', 1, 5]
|
61
|
+
include_examples 'scan', 'a\C-\nb', 1 => [:escape, :control, '\C-\n', 1, 6]
|
62
|
+
include_examples 'scan', 'a\C-\\\\b', 1 => [:escape, :control, '\C-\\\\', 1, 6]
|
63
|
+
include_examples 'scan', n.('a\c\M-Bc'), 1 => [:escape, :control, '\c\M-B', 1, 7]
|
64
|
+
include_examples 'scan', n.('a\C-\M-Bc'), 1 => [:escape, :control, '\C-\M-B', 1, 8]
|
65
|
+
|
66
|
+
include_examples 'scan', n.('a\M-Bc'), 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
|
67
|
+
include_examples 'scan', n.('a\M-\cBc'), 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
|
68
|
+
include_examples 'scan', n.('a\M-\c^'), 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
|
69
|
+
include_examples 'scan', n.('a\M-\c\n'), 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
|
70
|
+
include_examples 'scan', n.('a\M-\c\\\\'), 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
|
71
|
+
include_examples 'scan', n.('a\M-\C-Bc'), 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
|
72
|
+
include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
|
64
73
|
end
|