regexp_parser 2.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/Gemfile +5 -1
- data/README.md +15 -21
- data/Rakefile +11 -17
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -10
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/scanner/scanner.rl +64 -87
- data/lib/regexp_parser/scanner.rb +1024 -1073
- data/lib/regexp_parser/syntax/any.rb +2 -4
- data/lib/regexp_parser/syntax/base.rb +10 -10
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +29 -20
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +66 -23
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +27 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
@@ -0,0 +1,45 @@
|
|
1
|
+
# Define the base module and the simplest of tokens.
|
2
|
+
module Regexp::Syntax
|
3
|
+
module Token
|
4
|
+
Map = {}
|
5
|
+
|
6
|
+
module Literal
|
7
|
+
All = %i[literal]
|
8
|
+
Type = :literal
|
9
|
+
end
|
10
|
+
|
11
|
+
module FreeSpace
|
12
|
+
All = %i[comment whitespace]
|
13
|
+
Type = :free_space
|
14
|
+
end
|
15
|
+
|
16
|
+
Map[FreeSpace::Type] = FreeSpace::All
|
17
|
+
Map[Literal::Type] = Literal::All
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Load all the token files, they will populate the Map constant.
|
23
|
+
require 'regexp_parser/syntax/token/anchor'
|
24
|
+
require 'regexp_parser/syntax/token/assertion'
|
25
|
+
require 'regexp_parser/syntax/token/backreference'
|
26
|
+
require 'regexp_parser/syntax/token/posix_class'
|
27
|
+
require 'regexp_parser/syntax/token/character_set'
|
28
|
+
require 'regexp_parser/syntax/token/character_type'
|
29
|
+
require 'regexp_parser/syntax/token/conditional'
|
30
|
+
require 'regexp_parser/syntax/token/escape'
|
31
|
+
require 'regexp_parser/syntax/token/group'
|
32
|
+
require 'regexp_parser/syntax/token/keep'
|
33
|
+
require 'regexp_parser/syntax/token/meta'
|
34
|
+
require 'regexp_parser/syntax/token/quantifier'
|
35
|
+
require 'regexp_parser/syntax/token/unicode_property'
|
36
|
+
|
37
|
+
|
38
|
+
# After loading all the tokens the map is full. Extract all tokens and types
|
39
|
+
# into the All and Types constants.
|
40
|
+
module Regexp::Syntax
|
41
|
+
module Token
|
42
|
+
All = Map.values.flatten.uniq.sort.freeze
|
43
|
+
Types = Map.keys.freeze
|
44
|
+
end
|
45
|
+
end
|
@@ -3,13 +3,13 @@ module Regexp::Syntax
|
|
3
3
|
VERSION_REGEXP = /#{VERSION_FORMAT}/
|
4
4
|
VERSION_CONST_REGEXP = /\AV\d+_\d+(?:_\d+)?\z/
|
5
5
|
|
6
|
-
class InvalidVersionNameError < SyntaxError
|
6
|
+
class InvalidVersionNameError < Regexp::Syntax::SyntaxError
|
7
7
|
def initialize(name)
|
8
8
|
super "Invalid version name '#{name}'. Expected format is '#{VERSION_FORMAT}'"
|
9
9
|
end
|
10
10
|
end
|
11
11
|
|
12
|
-
class UnknownSyntaxNameError < SyntaxError
|
12
|
+
class UnknownSyntaxNameError < Regexp::Syntax::SyntaxError
|
13
13
|
def initialize(name)
|
14
14
|
super "Unknown syntax name '#{name}'."
|
15
15
|
end
|
@@ -5,7 +5,7 @@ module Regexp::Syntax
|
|
5
5
|
|
6
6
|
implements :anchor, Anchor::All
|
7
7
|
implements :assertion, Assertion::Lookahead
|
8
|
-
implements :backref,
|
8
|
+
implements :backref, Backreference::Plain
|
9
9
|
implements :posixclass, PosixClass::Standard
|
10
10
|
implements :group, Group::All
|
11
11
|
implements :meta, Meta::Extended
|
data/lib/regexp_parser/syntax.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require File.expand_path('../syntax/base', __FILE__)
|
3
|
-
require File.expand_path('../syntax/any', __FILE__)
|
4
|
-
require File.expand_path('../syntax/version_lookup', __FILE__)
|
5
|
-
require File.expand_path('../syntax/versions', __FILE__)
|
1
|
+
require 'regexp_parser/error'
|
6
2
|
|
7
3
|
module Regexp::Syntax
|
8
|
-
class SyntaxError <
|
4
|
+
class SyntaxError < Regexp::Parser::Error; end
|
9
5
|
end
|
6
|
+
|
7
|
+
require_relative 'syntax/token'
|
8
|
+
require_relative 'syntax/base'
|
9
|
+
require_relative 'syntax/any'
|
10
|
+
require_relative 'syntax/version_lookup'
|
11
|
+
require_relative 'syntax/versions'
|
data/lib/regexp_parser/token.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
class Regexp
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
:conditional_level
|
2
|
+
TOKEN_KEYS = %i[
|
3
|
+
type
|
4
|
+
token
|
5
|
+
text
|
6
|
+
ts
|
7
|
+
te
|
8
|
+
level
|
9
|
+
set_level
|
10
|
+
conditional_level
|
12
11
|
].freeze
|
13
12
|
|
14
13
|
Token = Struct.new(*TOKEN_KEYS) do
|
@@ -21,15 +20,5 @@ class Regexp
|
|
21
20
|
def length
|
22
21
|
te - ts
|
23
22
|
end
|
24
|
-
|
25
|
-
if RUBY_VERSION < '2.0.0'
|
26
|
-
def to_h
|
27
|
-
members.inject({}) do |hash, member|
|
28
|
-
hash[member.to_sym] = self[member]
|
29
|
-
hash
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
23
|
end
|
34
|
-
|
35
24
|
end
|
data/lib/regexp_parser.rb
CHANGED
@@ -27,8 +27,8 @@ RSpec.describe('Expression#clone') do
|
|
27
27
|
expect(root_2.quantifier.object_id).not_to eq copy_2.quantifier.object_id
|
28
28
|
|
29
29
|
# regression test
|
30
|
-
expect { root_2.clone }.not_to
|
31
|
-
expect { root_2.clone }.not_to
|
30
|
+
expect { root_2.clone }.not_to(change { root_2.quantifier.object_id })
|
31
|
+
expect { root_2.clone }.not_to(change { root_2.quantifier.text.object_id })
|
32
32
|
end
|
33
33
|
|
34
34
|
specify('Subexpression#clone') do
|
@@ -48,7 +48,7 @@ RSpec.describe('Expression#clone') do
|
|
48
48
|
end
|
49
49
|
|
50
50
|
# regression test
|
51
|
-
expect { root.clone }.not_to
|
51
|
+
expect { root.clone }.not_to(change { root.expressions.object_id })
|
52
52
|
end
|
53
53
|
|
54
54
|
specify('Group::Named#clone') do
|
@@ -69,7 +69,39 @@ RSpec.describe('Expression#clone') do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# regression test
|
72
|
-
expect { root_1.clone }.not_to
|
72
|
+
expect { root_1.clone }.not_to(change { root_1.name.object_id })
|
73
|
+
end
|
74
|
+
|
75
|
+
specify('Group::Options#clone') do
|
76
|
+
root = RP.parse('foo(?i)bar')
|
77
|
+
copy = root.clone
|
78
|
+
|
79
|
+
expect(copy.to_s).to eq root.to_s
|
80
|
+
|
81
|
+
root_1 = root[1]
|
82
|
+
copy_1 = copy[1]
|
83
|
+
|
84
|
+
expect(root_1.option_changes).to eq copy_1.option_changes
|
85
|
+
expect(root_1.option_changes.object_id).not_to eq copy_1.option_changes.object_id
|
86
|
+
|
87
|
+
# regression test
|
88
|
+
expect { root_1.clone }.not_to(change { root_1.option_changes.object_id })
|
89
|
+
end
|
90
|
+
|
91
|
+
specify('Backreference::Base#clone') do
|
92
|
+
root = RP.parse('(foo)\1')
|
93
|
+
copy = root.clone
|
94
|
+
|
95
|
+
expect(copy.to_s).to eq root.to_s
|
96
|
+
|
97
|
+
root_1 = root[1]
|
98
|
+
copy_1 = copy[1]
|
99
|
+
|
100
|
+
expect(root_1.referenced_expression.to_s).to eq copy_1.referenced_expression.to_s
|
101
|
+
expect(root_1.referenced_expression.object_id).not_to eq copy_1.referenced_expression.object_id
|
102
|
+
|
103
|
+
# regression test
|
104
|
+
expect { root_1.clone }.not_to(change { root_1.referenced_expression.object_id })
|
73
105
|
end
|
74
106
|
|
75
107
|
specify('Sequence#clone') do
|
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
|
|
10
10
|
space = root[0]
|
11
11
|
|
12
12
|
expect(space).to be_instance_of(FreeSpace::WhiteSpace)
|
13
|
-
expect { space.quantify(:dummy, '#') }.to raise_error(
|
13
|
+
expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
|
14
14
|
end
|
15
15
|
|
16
16
|
specify('comment quantify raises error') do
|
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
|
|
22
22
|
comment = root[3]
|
23
23
|
|
24
24
|
expect(comment).to be_instance_of(FreeSpace::Comment)
|
25
|
-
expect { comment.quantify(:dummy, '#') }.to raise_error(
|
25
|
+
expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
|
26
26
|
end
|
27
27
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
|
4
|
-
ML = described_class
|
3
|
+
ML = Regexp::MatchLength
|
5
4
|
|
5
|
+
RSpec.describe(Regexp::MatchLength) do
|
6
6
|
specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
|
7
7
|
specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
|
8
8
|
specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
|
data/spec/lexer/nesting_spec.rb
CHANGED
@@ -59,7 +59,7 @@ RSpec.describe('Nesting lexing') do
|
|
59
59
|
4 => [:literal, :literal, 'e', 4, 5, 0, 1, 0],
|
60
60
|
5 => [:set, :close, ']', 5, 6, 0, 0, 0]
|
61
61
|
|
62
|
-
include_examples 'lex',
|
62
|
+
include_examples 'lex', '[[:word:]&&[^c]z]',
|
63
63
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
64
64
|
1 => [:posixclass, :word, '[:word:]', 1, 9, 0, 1, 0],
|
65
65
|
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
@@ -70,7 +70,7 @@ RSpec.describe('Nesting lexing') do
|
|
70
70
|
7 => [:literal, :literal, 'z', 15, 16, 0, 1, 0],
|
71
71
|
8 => [:set, :close, ']', 16, 17, 0, 0, 0]
|
72
72
|
|
73
|
-
include_examples 'lex',
|
73
|
+
include_examples 'lex', '[\p{word}&&[^c]z]',
|
74
74
|
0 => [:set, :open, '[', 0, 1, 0, 0, 0],
|
75
75
|
1 => [:property, :word, '\p{word}', 1, 9, 0, 1, 0],
|
76
76
|
2 => [:set, :intersection, '&&', 9, 11, 0, 1, 0],
|
data/spec/lexer/refcalls_spec.rb
CHANGED
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
|
|
32
32
|
include_examples 'lex', "(abc)\\g'1'",
|
33
33
|
3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
|
34
34
|
|
35
|
+
include_examples 'lex', '\g<0>',
|
36
|
+
0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
|
37
|
+
include_examples 'lex', "\\g'0'",
|
38
|
+
0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
|
39
|
+
|
35
40
|
include_examples 'lex', '(abc)\g<-1>',
|
36
41
|
3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
|
37
42
|
include_examples 'lex', "(abc)\\g'-1'",
|
data/spec/parser/all_spec.rb
CHANGED
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
|
|
34
34
|
end
|
35
35
|
|
36
36
|
specify('parse no quantifier target raises error') do
|
37
|
-
expect { RP.parse('?abc') }.to raise_error(
|
37
|
+
expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
|
38
38
|
end
|
39
39
|
|
40
40
|
specify('parse sequence no quantifier target raises error') do
|
41
|
-
expect { RP.parse('abc|?def') }.to raise_error(
|
41
|
+
expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
|
42
42
|
end
|
43
43
|
end
|
data/spec/parser/escapes_spec.rb
CHANGED
@@ -56,8 +56,20 @@ RSpec.describe('EscapeSequence parsing') do
|
|
56
56
|
expect { root[5].codepoint }.to raise_error(/#codepoints/)
|
57
57
|
end
|
58
58
|
|
59
|
+
# Meta/control espaces
|
60
|
+
#
|
61
|
+
# After the following fix in Ruby 3.1, a Regexp#source containing meta/control
|
62
|
+
# escapes can only be set with the Regexp::new constructor.
|
63
|
+
# In Regexp literals, these escapes are now pre-processed to hex escapes.
|
64
|
+
#
|
65
|
+
# https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
|
66
|
+
def parse_meta_control(regexp_body)
|
67
|
+
regexp = Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n')
|
68
|
+
RP.parse(regexp)
|
69
|
+
end
|
70
|
+
|
59
71
|
specify('parse escape control sequence lower') do
|
60
|
-
root =
|
72
|
+
root = parse_meta_control('a\\\\\c2b')
|
61
73
|
|
62
74
|
expect(root[2]).to be_instance_of(EscapeSequence::Control)
|
63
75
|
expect(root[2].text).to eq '\\c2'
|
@@ -66,56 +78,56 @@ RSpec.describe('EscapeSequence parsing') do
|
|
66
78
|
end
|
67
79
|
|
68
80
|
specify('parse escape control sequence upper') do
|
69
|
-
root =
|
81
|
+
root = parse_meta_control('\d\C-C\w')
|
70
82
|
|
71
|
-
expect(root[
|
72
|
-
expect(root[
|
73
|
-
expect(root[
|
74
|
-
expect(root[
|
83
|
+
expect(root[1]).to be_instance_of(EscapeSequence::Control)
|
84
|
+
expect(root[1].text).to eq '\\C-C'
|
85
|
+
expect(root[1].char).to eq "\x03"
|
86
|
+
expect(root[1].codepoint).to eq 3
|
75
87
|
end
|
76
88
|
|
77
89
|
specify('parse escape meta sequence') do
|
78
|
-
root =
|
90
|
+
root = parse_meta_control('\Z\M-Z')
|
79
91
|
|
80
|
-
expect(root[
|
81
|
-
expect(root[
|
82
|
-
expect(root[
|
83
|
-
expect(root[
|
92
|
+
expect(root[1]).to be_instance_of(EscapeSequence::Meta)
|
93
|
+
expect(root[1].text).to eq '\\M-Z'
|
94
|
+
expect(root[1].char).to eq "\u00DA"
|
95
|
+
expect(root[1].codepoint).to eq 218
|
84
96
|
end
|
85
97
|
|
86
98
|
specify('parse escape meta control sequence') do
|
87
|
-
root =
|
99
|
+
root = parse_meta_control('\A\M-\C-X')
|
88
100
|
|
89
|
-
expect(root[
|
90
|
-
expect(root[
|
91
|
-
expect(root[
|
92
|
-
expect(root[
|
101
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
102
|
+
expect(root[1].text).to eq '\\M-\\C-X'
|
103
|
+
expect(root[1].char).to eq "\u0098"
|
104
|
+
expect(root[1].codepoint).to eq 152
|
93
105
|
end
|
94
106
|
|
95
107
|
specify('parse lower c meta control sequence') do
|
96
|
-
root =
|
108
|
+
root = parse_meta_control('\A\M-\cX')
|
97
109
|
|
98
|
-
expect(root[
|
99
|
-
expect(root[
|
100
|
-
expect(root[
|
101
|
-
expect(root[
|
110
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
111
|
+
expect(root[1].text).to eq '\\M-\\cX'
|
112
|
+
expect(root[1].char).to eq "\u0098"
|
113
|
+
expect(root[1].codepoint).to eq 152
|
102
114
|
end
|
103
115
|
|
104
116
|
specify('parse escape reverse meta control sequence') do
|
105
|
-
root =
|
117
|
+
root = parse_meta_control('\A\C-\M-X')
|
106
118
|
|
107
|
-
expect(root[
|
108
|
-
expect(root[
|
109
|
-
expect(root[
|
110
|
-
expect(root[
|
119
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
120
|
+
expect(root[1].text).to eq '\\C-\\M-X'
|
121
|
+
expect(root[1].char).to eq "\u0098"
|
122
|
+
expect(root[1].codepoint).to eq 152
|
111
123
|
end
|
112
124
|
|
113
125
|
specify('parse escape reverse lower c meta control sequence') do
|
114
|
-
root =
|
126
|
+
root = parse_meta_control('\A\c\M-X')
|
115
127
|
|
116
|
-
expect(root[
|
117
|
-
expect(root[
|
118
|
-
expect(root[
|
119
|
-
expect(root[
|
128
|
+
expect(root[1]).to be_instance_of(EscapeSequence::MetaControl)
|
129
|
+
expect(root[1].text).to eq '\\c\\M-X'
|
130
|
+
expect(root[1].char).to eq "\u0098"
|
131
|
+
expect(root[1].codepoint).to eq 152
|
120
132
|
end
|
121
133
|
end
|
@@ -37,11 +37,13 @@ RSpec.describe('Property parsing') do
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
40
|
+
if ruby_version_at_least('2.7.0')
|
41
|
+
specify('parse all properties of current ruby') do
|
42
|
+
unsupported = RegexpPropertyValues.all_for_current_ruby.reject do |prop|
|
43
|
+
RP.parse("\\p{#{prop}}") rescue false
|
44
|
+
end
|
45
|
+
expect(unsupported).to be_empty
|
43
46
|
end
|
44
|
-
expect(unsupported).to be_empty
|
45
47
|
end
|
46
48
|
|
47
49
|
specify('parse property negative') do
|
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
|
|
29
29
|
include_examples 'parse', /(abc)\g'1'/,
|
30
30
|
1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
|
31
31
|
|
32
|
+
include_examples 'parse', '\g<0>',
|
33
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
34
|
+
include_examples 'parse', "\\g'0'",
|
35
|
+
0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
|
36
|
+
|
32
37
|
include_examples 'parse', /(abc)\g<-1>/,
|
33
38
|
1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
|
34
39
|
include_examples 'parse', /(abc)\g'-1'/,
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe('CharacterSet::Range parsing') do
|
4
|
+
# Some edge-case patterns are evaluated with #match to make sure that
|
5
|
+
# their behavior still reflects the way they are parsed.
|
6
|
+
# #capturing_stderr is used to skip any warnings generated by this.
|
7
|
+
|
4
8
|
specify('parse set range') do
|
5
9
|
root = RP.parse('[a-z]')
|
6
10
|
set = root[0]
|
@@ -13,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
13
17
|
expect(range.first).to be_instance_of(Literal)
|
14
18
|
expect(range.last.to_s).to eq 'z'
|
15
19
|
expect(range.last).to be_instance_of(Literal)
|
16
|
-
expect(set).to match 'm'
|
20
|
+
capturing_stderr { expect(set).to match 'm' }
|
17
21
|
end
|
18
22
|
|
19
23
|
specify('parse set range hex') do
|
@@ -28,7 +32,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
28
32
|
expect(range.first).to be_instance_of(EscapeSequence::Hex)
|
29
33
|
expect(range.last.to_s).to eq '\\x22'
|
30
34
|
expect(range.last).to be_instance_of(EscapeSequence::Hex)
|
31
|
-
expect(set).to match "\x11"
|
35
|
+
capturing_stderr { expect(set).to match "\x11" }
|
32
36
|
end
|
33
37
|
|
34
38
|
specify('parse set range unicode') do
|
@@ -43,7 +47,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
43
47
|
expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
|
44
48
|
expect(range.last.to_s).to eq '\\u1234'
|
45
49
|
expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
|
46
|
-
expect(set).to match '\\u600'
|
50
|
+
capturing_stderr { expect(set).to match '\\u600' }
|
47
51
|
end
|
48
52
|
|
49
53
|
specify('parse set range edge case leading dash') do
|
@@ -53,7 +57,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
53
57
|
|
54
58
|
expect(set.count).to eq 1
|
55
59
|
expect(range.count).to eq 2
|
56
|
-
expect(set).to match 'a'
|
60
|
+
capturing_stderr { expect(set).to match 'a' }
|
57
61
|
end
|
58
62
|
|
59
63
|
specify('parse set range edge case trailing dash') do
|
@@ -63,7 +67,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
63
67
|
|
64
68
|
expect(set.count).to eq 1
|
65
69
|
expect(range.count).to eq 2
|
66
|
-
expect(set).to match '$'
|
70
|
+
capturing_stderr { expect(set).to match '$' }
|
67
71
|
end
|
68
72
|
|
69
73
|
specify('parse set range edge case leading negate') do
|
@@ -71,8 +75,10 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
71
75
|
set = root[0]
|
72
76
|
|
73
77
|
expect(set.count).to eq 2
|
74
|
-
|
75
|
-
|
78
|
+
capturing_stderr do
|
79
|
+
expect(set).to match 'a'
|
80
|
+
expect(set).not_to match 'z'
|
81
|
+
end
|
76
82
|
end
|
77
83
|
|
78
84
|
specify('parse set range edge case trailing negate') do
|
@@ -82,7 +88,7 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
82
88
|
|
83
89
|
expect(set.count).to eq 1
|
84
90
|
expect(range.count).to eq 2
|
85
|
-
expect(set).to match '$'
|
91
|
+
capturing_stderr { expect(set).to match '$' }
|
86
92
|
end
|
87
93
|
|
88
94
|
specify('parse set range edge case leading intersection') do
|
@@ -91,10 +97,12 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
91
97
|
|
92
98
|
expect(set.count).to eq 1
|
93
99
|
expect(set.first.last.to_s).to eq '-bc'
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
100
|
+
capturing_stderr do
|
101
|
+
expect(set).to match '-'
|
102
|
+
expect(set).to match 'b'
|
103
|
+
expect(set).not_to match 'a'
|
104
|
+
expect(set).not_to match 'c'
|
105
|
+
end
|
98
106
|
end
|
99
107
|
|
100
108
|
specify('parse set range edge case trailing intersection') do
|
@@ -103,9 +111,11 @@ RSpec.describe('CharacterSet::Range parsing') do
|
|
103
111
|
|
104
112
|
expect(set.count).to eq 1
|
105
113
|
expect(set.first.first.to_s).to eq 'bc-'
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
114
|
+
capturing_stderr do
|
115
|
+
expect(set).to match '-'
|
116
|
+
expect(set).to match 'b'
|
117
|
+
expect(set).not_to match 'a'
|
118
|
+
expect(set).not_to match 'c'
|
119
|
+
end
|
110
120
|
end
|
111
121
|
end
|
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
|
|
4
4
|
include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
|
5
5
|
|
6
6
|
# not an escape outside a character set
|
7
|
-
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1,
|
7
|
+
include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
|
8
8
|
|
9
9
|
include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
|
10
10
|
include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
|
@@ -35,25 +35,6 @@ RSpec.describe('Escape scanning') do
|
|
35
35
|
include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
|
36
36
|
include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
|
37
37
|
|
38
|
-
include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
|
39
|
-
include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
|
40
|
-
include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
|
41
|
-
include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
|
42
|
-
include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
|
43
|
-
include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
|
44
|
-
include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
|
45
|
-
include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
|
46
|
-
include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
|
47
|
-
include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
|
48
|
-
|
49
|
-
include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
|
50
|
-
include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
|
51
|
-
include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
|
52
|
-
include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
|
53
|
-
include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
|
54
|
-
include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
|
55
|
-
include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
|
56
|
-
|
57
38
|
include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
58
39
|
include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
59
40
|
include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
|
@@ -61,4 +42,32 @@ RSpec.describe('Escape scanning') do
|
|
61
42
|
include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
|
62
43
|
include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
|
63
44
|
include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
|
45
|
+
|
46
|
+
# Meta/control espaces
|
47
|
+
#
|
48
|
+
# After the following fix in Ruby 3.1, a Regexp#source containing meta/control
|
49
|
+
# escapes can only be set with the Regexp::new constructor.
|
50
|
+
# In Regexp literals, these escapes are now pre-processed to hex escapes.
|
51
|
+
#
|
52
|
+
# https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9
|
53
|
+
n = ->(regexp_body){ Regexp.new(regexp_body.force_encoding('ascii-8bit'), 'n') }
|
54
|
+
|
55
|
+
include_examples 'scan', 'a\cBc', 1 => [:escape, :control, '\cB', 1, 4]
|
56
|
+
include_examples 'scan', 'a\c^c', 1 => [:escape, :control, '\c^', 1, 4]
|
57
|
+
include_examples 'scan', 'a\c\n', 1 => [:escape, :control, '\c\n', 1, 5]
|
58
|
+
include_examples 'scan', 'a\c\\\\b', 1 => [:escape, :control, '\c\\\\', 1, 5]
|
59
|
+
include_examples 'scan', 'a\C-bc', 1 => [:escape, :control, '\C-b', 1, 5]
|
60
|
+
include_examples 'scan', 'a\C-^b', 1 => [:escape, :control, '\C-^', 1, 5]
|
61
|
+
include_examples 'scan', 'a\C-\nb', 1 => [:escape, :control, '\C-\n', 1, 6]
|
62
|
+
include_examples 'scan', 'a\C-\\\\b', 1 => [:escape, :control, '\C-\\\\', 1, 6]
|
63
|
+
include_examples 'scan', n.('a\c\M-Bc'), 1 => [:escape, :control, '\c\M-B', 1, 7]
|
64
|
+
include_examples 'scan', n.('a\C-\M-Bc'), 1 => [:escape, :control, '\C-\M-B', 1, 8]
|
65
|
+
|
66
|
+
include_examples 'scan', n.('a\M-Bc'), 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
|
67
|
+
include_examples 'scan', n.('a\M-\cBc'), 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
|
68
|
+
include_examples 'scan', n.('a\M-\c^'), 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
|
69
|
+
include_examples 'scan', n.('a\M-\c\n'), 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
|
70
|
+
include_examples 'scan', n.('a\M-\c\\\\'), 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
|
71
|
+
include_examples 'scan', n.('a\M-\C-Bc'), 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
|
72
|
+
include_examples 'scan', n.('a\M-\C-\\\\'), 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
|
64
73
|
end
|