regexp_parser 2.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/Gemfile +5 -1
- data/README.md +15 -21
- data/Rakefile +11 -17
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -10
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/scanner/scanner.rl +64 -87
- data/lib/regexp_parser/scanner.rb +1024 -1073
- data/lib/regexp_parser/syntax/any.rb +2 -4
- data/lib/regexp_parser/syntax/base.rb +10 -10
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +29 -20
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +66 -23
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +27 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
@@ -1,129 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class Base
|
4
|
-
attr_accessor :type, :token
|
5
|
-
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
|
-
|
8
|
-
attr_accessor :quantifier
|
9
|
-
attr_accessor :options
|
10
|
-
|
11
|
-
def initialize(token, options = {})
|
12
|
-
self.type = token.type
|
13
|
-
self.token = token.token
|
14
|
-
self.text = token.text
|
15
|
-
self.ts = token.ts
|
16
|
-
self.level = token.level
|
17
|
-
self.set_level = token.set_level
|
18
|
-
self.conditional_level = token.conditional_level
|
19
|
-
self.nesting_level = 0
|
20
|
-
self.quantifier = nil
|
21
|
-
self.options = options
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize_clone(orig)
|
25
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
|
-
super
|
29
|
-
end
|
30
|
-
|
31
|
-
def to_re(format = :full)
|
32
|
-
::Regexp.new(to_s(format))
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :starts_at :ts
|
36
|
-
|
37
|
-
def base_length
|
38
|
-
to_s(:base).length
|
39
|
-
end
|
40
|
-
|
41
|
-
def full_length
|
42
|
-
to_s.length
|
43
|
-
end
|
44
|
-
|
45
|
-
def offset
|
46
|
-
[starts_at, full_length]
|
47
|
-
end
|
48
|
-
|
49
|
-
def coded_offset
|
50
|
-
'@%d+%d' % offset
|
51
|
-
end
|
52
|
-
|
53
|
-
def to_s(format = :full)
|
54
|
-
"#{text}#{quantifier_affix(format)}"
|
55
|
-
end
|
56
|
-
|
57
|
-
def quantifier_affix(expression_format)
|
58
|
-
quantifier.to_s if quantified? && expression_format != :base
|
59
|
-
end
|
60
|
-
|
61
|
-
def terminal?
|
62
|
-
!respond_to?(:expressions)
|
63
|
-
end
|
64
|
-
|
65
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
66
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
67
|
-
end
|
68
|
-
|
69
|
-
def unquantified_clone
|
70
|
-
clone.tap { |exp| exp.quantifier = nil }
|
71
|
-
end
|
72
|
-
|
73
|
-
def quantified?
|
74
|
-
!quantifier.nil?
|
75
|
-
end
|
76
|
-
|
77
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
78
|
-
def quantity
|
79
|
-
return [nil,nil] unless quantified?
|
80
|
-
[quantifier.min, quantifier.max]
|
81
|
-
end
|
82
|
-
|
83
|
-
def repetitions
|
84
|
-
return 1..1 unless quantified?
|
85
|
-
min = quantifier.min
|
86
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
87
|
-
range = min..max
|
88
|
-
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
89
|
-
if RUBY_VERSION.to_f < 2.7
|
90
|
-
range.define_singleton_method(:minmax) { [min, max] }
|
91
|
-
end
|
92
|
-
range
|
93
|
-
end
|
94
|
-
|
95
|
-
def greedy?
|
96
|
-
quantified? and quantifier.greedy?
|
97
|
-
end
|
98
|
-
|
99
|
-
def reluctant?
|
100
|
-
quantified? and quantifier.reluctant?
|
101
|
-
end
|
102
|
-
alias :lazy? :reluctant?
|
103
|
-
|
104
|
-
def possessive?
|
105
|
-
quantified? and quantifier.possessive?
|
106
|
-
end
|
107
|
-
|
108
|
-
def attributes
|
109
|
-
{
|
110
|
-
type: type,
|
111
|
-
token: token,
|
112
|
-
text: to_s(:base),
|
113
|
-
starts_at: ts,
|
114
|
-
length: full_length,
|
115
|
-
level: level,
|
116
|
-
set_level: set_level,
|
117
|
-
conditional_level: conditional_level,
|
118
|
-
options: options,
|
119
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
120
|
-
}
|
121
|
-
end
|
122
|
-
alias :to_h :attributes
|
123
|
-
end
|
124
|
-
|
125
|
-
end # module Regexp::Expression
|
1
|
+
require 'regexp_parser/error'
|
126
2
|
|
3
|
+
require 'regexp_parser/expression/base'
|
127
4
|
require 'regexp_parser/expression/quantifier'
|
128
5
|
require 'regexp_parser/expression/subexpression'
|
129
6
|
require 'regexp_parser/expression/sequence'
|
@@ -131,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
|
|
131
8
|
|
132
9
|
require 'regexp_parser/expression/classes/alternation'
|
133
10
|
require 'regexp_parser/expression/classes/anchor'
|
134
|
-
require 'regexp_parser/expression/classes/
|
11
|
+
require 'regexp_parser/expression/classes/backreference'
|
12
|
+
require 'regexp_parser/expression/classes/character_set'
|
13
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
135
15
|
require 'regexp_parser/expression/classes/conditional'
|
136
|
-
require 'regexp_parser/expression/classes/
|
16
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
137
17
|
require 'regexp_parser/expression/classes/free_space'
|
138
18
|
require 'regexp_parser/expression/classes/group'
|
139
19
|
require 'regexp_parser/expression/classes/keep'
|
@@ -141,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
|
|
141
21
|
require 'regexp_parser/expression/classes/posix_class'
|
142
22
|
require 'regexp_parser/expression/classes/property'
|
143
23
|
require 'regexp_parser/expression/classes/root'
|
144
|
-
require 'regexp_parser/expression/classes/set'
|
145
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
146
|
-
require 'regexp_parser/expression/classes/set/range'
|
147
24
|
require 'regexp_parser/expression/classes/type'
|
148
25
|
|
149
26
|
require 'regexp_parser/expression/methods/match'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,12 +4,14 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
|
+
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
13
15
|
|
14
16
|
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
15
17
|
new.lex(input, syntax, options: options, &block)
|
@@ -40,7 +42,7 @@ class Regexp::Lexer
|
|
40
42
|
nesting, set_nesting, conditional_nesting)
|
41
43
|
|
42
44
|
current = merge_condition(current) if type == :conditional and
|
43
|
-
|
45
|
+
CONDITION_TOKENS.include?(token)
|
44
46
|
|
45
47
|
last.next = current if last
|
46
48
|
current.previous = last if last
|