regexp_parser 2.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/Gemfile +5 -1
- data/README.md +15 -21
- data/Rakefile +11 -17
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -10
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/scanner/scanner.rl +64 -87
- data/lib/regexp_parser/scanner.rb +1024 -1073
- data/lib/regexp_parser/syntax/any.rb +2 -4
- data/lib/regexp_parser/syntax/base.rb +10 -10
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +29 -20
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +66 -23
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +27 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
@@ -1,15 +1,13 @@
|
|
1
1
|
module Regexp::Syntax
|
2
|
-
|
3
2
|
# A syntax that always returns true, passing all tokens as implemented. This
|
4
3
|
# is useful during development, testing, and should be useful for some types
|
5
4
|
# of transformations as well.
|
6
5
|
class Any < Base
|
7
|
-
def initialize
|
8
|
-
@implements = { :* => [
|
6
|
+
def initialize # rubocop:disable Lint/MissingSuper
|
7
|
+
@implements = { :* => %i[*] }
|
9
8
|
end
|
10
9
|
|
11
10
|
def implements?(_type, _token) true end
|
12
11
|
def implements!(_type, _token) true end
|
13
12
|
end
|
14
|
-
|
15
13
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'set'
|
2
2
|
|
3
3
|
module Regexp::Syntax
|
4
|
-
class NotImplementedError < SyntaxError
|
4
|
+
class NotImplementedError < Regexp::Syntax::SyntaxError
|
5
5
|
def initialize(syntax, type, token)
|
6
6
|
super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
|
7
7
|
end
|
@@ -59,7 +59,7 @@ module Regexp::Syntax
|
|
59
59
|
def normalize_group(type, token)
|
60
60
|
case token
|
61
61
|
when :named_ab, :named_sq
|
62
|
-
[
|
62
|
+
%i[group named]
|
63
63
|
else
|
64
64
|
[type, token]
|
65
65
|
end
|
@@ -68,21 +68,21 @@ module Regexp::Syntax
|
|
68
68
|
def normalize_backref(type, token)
|
69
69
|
case token
|
70
70
|
when :name_ref_ab, :name_ref_sq
|
71
|
-
[
|
71
|
+
%i[backref name_ref]
|
72
72
|
when :name_call_ab, :name_call_sq
|
73
|
-
[
|
73
|
+
%i[backref name_call]
|
74
74
|
when :name_recursion_ref_ab, :name_recursion_ref_sq
|
75
|
-
[
|
75
|
+
%i[backref name_recursion_ref]
|
76
76
|
when :number_ref_ab, :number_ref_sq
|
77
|
-
[
|
77
|
+
%i[backref number_ref]
|
78
78
|
when :number_call_ab, :number_call_sq
|
79
|
-
[
|
79
|
+
%i[backref number_call]
|
80
80
|
when :number_rel_ref_ab, :number_rel_ref_sq
|
81
|
-
[
|
81
|
+
%i[backref number_rel_ref]
|
82
82
|
when :number_rel_call_ab, :number_rel_call_sq
|
83
|
-
[
|
83
|
+
%i[backref number_rel_call]
|
84
84
|
when :number_recursion_ref_ab, :number_recursion_ref_sq
|
85
|
-
[
|
85
|
+
%i[backref number_recursion_ref]
|
86
86
|
else
|
87
87
|
[type, token]
|
88
88
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
module Token
|
3
|
+
module Anchor
|
4
|
+
Basic = %i[bol eol]
|
5
|
+
Extended = Basic + %i[word_boundary nonword_boundary]
|
6
|
+
String = %i[bos eos eos_ob_eol]
|
7
|
+
MatchStart = %i[match_start]
|
8
|
+
|
9
|
+
All = Extended + String + MatchStart
|
10
|
+
Type = :anchor
|
11
|
+
end
|
12
|
+
|
13
|
+
Map[Anchor::Type] = Anchor::All
|
14
|
+
end
|
15
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module Assertion
|
4
|
-
Lookahead = [
|
5
|
-
Lookbehind = [
|
4
|
+
Lookahead = %i[lookahead nlookahead]
|
5
|
+
Lookbehind = %i[lookbehind nlookbehind]
|
6
6
|
|
7
7
|
All = Lookahead + Lookbehind
|
8
8
|
Type = :assertion
|
@@ -1,10 +1,11 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module Backreference
|
4
|
-
|
5
|
-
Number = [
|
4
|
+
Plain = %i[number]
|
5
|
+
Number = Plain + %i[number_ref number_rel_ref]
|
6
|
+
Name = %i[name_ref]
|
6
7
|
|
7
|
-
RecursionLevel = [
|
8
|
+
RecursionLevel = %i[name_recursion_ref number_recursion_ref]
|
8
9
|
|
9
10
|
All = Name + Number + RecursionLevel
|
10
11
|
Type = :backref
|
@@ -12,8 +13,8 @@ module Regexp::Syntax
|
|
12
13
|
|
13
14
|
# Type is the same as Backreference so keeping it here, for now.
|
14
15
|
module SubexpressionCall
|
15
|
-
Name = [
|
16
|
-
Number = [
|
16
|
+
Name = %i[name_call]
|
17
|
+
Number = %i[number_call number_rel_call]
|
17
18
|
|
18
19
|
All = Name + Number
|
19
20
|
end
|
@@ -2,10 +2,10 @@ module Regexp::Syntax
|
|
2
2
|
module Token
|
3
3
|
module CharacterType
|
4
4
|
Basic = []
|
5
|
-
Extended = [
|
6
|
-
Hex = [
|
5
|
+
Extended = %i[digit nondigit space nonspace word nonword]
|
6
|
+
Hex = %i[hex nonhex]
|
7
7
|
|
8
|
-
Clustered = [
|
8
|
+
Clustered = %i[linebreak xgrapheme]
|
9
9
|
|
10
10
|
All = Basic + Extended + Hex + Clustered
|
11
11
|
Type = :type
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module Conditional
|
4
|
-
Delimiters = [
|
4
|
+
Delimiters = %i[open close]
|
5
5
|
|
6
|
-
Condition = [
|
7
|
-
Separator = [
|
6
|
+
Condition = %i[condition_open condition condition_close]
|
7
|
+
Separator = %i[separator]
|
8
8
|
|
9
9
|
All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
|
10
10
|
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
module Token
|
3
|
+
# TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
|
4
|
+
module Escape
|
5
|
+
Basic = %i[backslash literal]
|
6
|
+
|
7
|
+
Control = %i[control meta_sequence]
|
8
|
+
|
9
|
+
ASCII = %i[bell backspace escape form_feed newline carriage
|
10
|
+
tab vertical_tab]
|
11
|
+
|
12
|
+
Unicode = %i[codepoint codepoint_list]
|
13
|
+
|
14
|
+
Meta = %i[dot alternation
|
15
|
+
zero_or_one zero_or_more one_or_more
|
16
|
+
bol eol
|
17
|
+
group_open group_close
|
18
|
+
interval_open interval_close
|
19
|
+
set_open set_close]
|
20
|
+
|
21
|
+
Hex = %i[hex]
|
22
|
+
|
23
|
+
Octal = %i[octal]
|
24
|
+
|
25
|
+
All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
|
26
|
+
Type = :escape
|
27
|
+
end
|
28
|
+
|
29
|
+
Map[Escape::Type] = Escape::All
|
30
|
+
end
|
31
|
+
end
|
@@ -1,18 +1,18 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module Group
|
4
|
-
Basic = [
|
5
|
-
Extended = Basic + [
|
4
|
+
Basic = %i[capture close]
|
5
|
+
Extended = Basic + %i[options options_switch]
|
6
6
|
|
7
|
-
Named = [
|
8
|
-
Atomic = [
|
9
|
-
Passive = [
|
10
|
-
Comment = [
|
7
|
+
Named = %i[named]
|
8
|
+
Atomic = %i[atomic]
|
9
|
+
Passive = %i[passive]
|
10
|
+
Comment = %i[comment]
|
11
11
|
|
12
12
|
V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
|
13
13
|
Group::Passive + Group::Comment
|
14
14
|
|
15
|
-
V2_4_1 = [
|
15
|
+
V2_4_1 = %i[absence]
|
16
16
|
|
17
17
|
All = V1_8_6 + V2_4_1
|
18
18
|
Type = :group
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module Regexp::Syntax
|
2
2
|
module Token
|
3
3
|
module PosixClass
|
4
|
-
Standard = [
|
5
|
-
|
4
|
+
Standard = %i[alnum alpha blank cntrl digit graph
|
5
|
+
lower print punct space upper xdigit]
|
6
6
|
|
7
|
-
Extensions = [
|
7
|
+
Extensions = %i[ascii word]
|
8
8
|
|
9
9
|
All = Standard + Extensions
|
10
10
|
Type = :posixclass
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Regexp::Syntax
|
2
|
+
module Token
|
3
|
+
module Quantifier
|
4
|
+
Greedy = %i[
|
5
|
+
zero_or_one
|
6
|
+
zero_or_more
|
7
|
+
one_or_more
|
8
|
+
]
|
9
|
+
|
10
|
+
Reluctant = %i[
|
11
|
+
zero_or_one_reluctant
|
12
|
+
zero_or_more_reluctant
|
13
|
+
one_or_more_reluctant
|
14
|
+
]
|
15
|
+
|
16
|
+
Possessive = %i[
|
17
|
+
zero_or_one_possessive
|
18
|
+
zero_or_more_possessive
|
19
|
+
one_or_more_possessive
|
20
|
+
]
|
21
|
+
|
22
|
+
Interval = %i[interval]
|
23
|
+
IntervalReluctant = %i[interval_reluctant]
|
24
|
+
IntervalPossessive = %i[interval_possessive]
|
25
|
+
|
26
|
+
IntervalAll = Interval + IntervalReluctant +
|
27
|
+
IntervalPossessive
|
28
|
+
|
29
|
+
All = Greedy + Reluctant + Possessive + IntervalAll
|
30
|
+
Type = :quantifier
|
31
|
+
end
|
32
|
+
|
33
|
+
Map[Quantifier::Type] = Quantifier::All
|
34
|
+
end
|
35
|
+
end
|