regexp_parser 1.7.1 → 1.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +38 -0
- data/README.md +22 -7
- data/lib/regexp_parser/expression.rb +6 -2
- data/lib/regexp_parser/lexer.rb +4 -4
- data/lib/regexp_parser/parser.rb +18 -12
- data/lib/regexp_parser/scanner.rb +1009 -1009
- data/lib/regexp_parser/scanner/scanner.rl +23 -14
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +1 -1
- data/spec/parser/options_spec.rb +28 -0
- data/spec/scanner/escapes_spec.rb +4 -0
- data/spec/scanner/free_space_spec.rb +32 -0
- data/spec/scanner/options_spec.rb +36 -0
- metadata +62 -58
@@ -21,7 +21,7 @@
|
|
21
21
|
set_close = ']';
|
22
22
|
brackets = set_open | set_close;
|
23
23
|
|
24
|
-
comment = ('#' . [^\n]* . '\n');
|
24
|
+
comment = ('#' . [^\n]* . '\n'?);
|
25
25
|
|
26
26
|
class_name_posix = 'alnum' | 'alpha' | 'blank' |
|
27
27
|
'cntrl' | 'digit' | 'graph' |
|
@@ -120,7 +120,7 @@
|
|
120
120
|
|
121
121
|
literal_delimiters = ']' | '}';
|
122
122
|
|
123
|
-
ascii_print = ((0x20..0x7e) - meta_char);
|
123
|
+
ascii_print = ((0x20..0x7e) - meta_char - '#');
|
124
124
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
125
125
|
|
126
126
|
utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
|
@@ -128,7 +128,7 @@
|
|
128
128
|
utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
129
129
|
|
130
130
|
non_literal_escape = char_type_char | anchor_char | escaped_ascii |
|
131
|
-
|
131
|
+
keep_mark | [xucCM];
|
132
132
|
|
133
133
|
non_set_escape = (anchor_char - 'b') | group_ref | keep_mark |
|
134
134
|
multi_codepoint_char_type | [0-9cCM];
|
@@ -649,7 +649,9 @@
|
|
649
649
|
if free_spacing
|
650
650
|
emit(:free_space, :comment, *text(data, ts, te))
|
651
651
|
else
|
652
|
-
|
652
|
+
# consume only the pound sign (#) and backtrack to do regular scanning
|
653
|
+
append_literal(data, ts, ts + 1)
|
654
|
+
fexec ts + 1;
|
653
655
|
end
|
654
656
|
};
|
655
657
|
|
@@ -737,21 +739,16 @@ class Regexp::Scanner
|
|
737
739
|
#
|
738
740
|
# This method may raise errors if a syntax error is encountered.
|
739
741
|
# --------------------------------------------------------------------------
|
740
|
-
def self.scan(input_object, &block)
|
741
|
-
new.scan(input_object, &block)
|
742
|
+
def self.scan(input_object, options: nil, &block)
|
743
|
+
new.scan(input_object, options: options, &block)
|
742
744
|
end
|
743
745
|
|
744
|
-
def scan(input_object, &block)
|
746
|
+
def scan(input_object, options: nil, &block)
|
745
747
|
self.literal = nil
|
746
748
|
stack = []
|
747
749
|
|
748
|
-
|
749
|
-
|
750
|
-
self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
|
751
|
-
else
|
752
|
-
input = input_object
|
753
|
-
self.free_spacing = false
|
754
|
-
end
|
750
|
+
input = input_object.is_a?(Regexp) ? input_object.source : input_object
|
751
|
+
self.free_spacing = free_spacing?(input_object, options)
|
755
752
|
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
756
753
|
|
757
754
|
data = input.unpack("c*") if input.is_a?(String)
|
@@ -817,6 +814,18 @@ class Regexp::Scanner
|
|
817
814
|
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
|
818
815
|
:group_depth, :set_depth, :conditional_stack
|
819
816
|
|
817
|
+
def free_spacing?(input_object, options)
|
818
|
+
if options && !input_object.is_a?(String)
|
819
|
+
raise ArgumentError, 'options cannot be supplied unless scanning a String'
|
820
|
+
end
|
821
|
+
|
822
|
+
options = input_object.options if input_object.is_a?(::Regexp)
|
823
|
+
|
824
|
+
return false unless options
|
825
|
+
|
826
|
+
options & Regexp::EXTENDED != 0
|
827
|
+
end
|
828
|
+
|
820
829
|
def in_group?
|
821
830
|
group_depth > 0
|
822
831
|
end
|
@@ -74,9 +74,9 @@ module Regexp::Syntax
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def warn_if_future_version(const_name)
|
77
|
-
return if comparable_version(const_name) < comparable_version('
|
77
|
+
return if comparable_version(const_name) < comparable_version('4.0.0')
|
78
78
|
|
79
|
-
warn('This library has only been tested up to Ruby
|
79
|
+
warn('This library has only been tested up to Ruby 3.x, '\
|
80
80
|
"but you are running with #{const_get(const_name).inspect}")
|
81
81
|
end
|
82
82
|
end
|
data/regexp_parser.gemspec
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to parse') do
|
4
|
+
it 'raises if if parsing from a Regexp and options are passed' do
|
5
|
+
expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
6
|
+
ArgumentError,
|
7
|
+
'options cannot be supplied unless parsing a String'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'sets options if parsing from a String' do
|
12
|
+
root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
|
13
|
+
|
14
|
+
expect(root.options).to eq(m: true, x: true)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'allows options to not be supplied when parsing from a Regexp' do
|
18
|
+
root = RP.parse(/a+/ix)
|
19
|
+
|
20
|
+
expect(root.options).to eq(i: true, x: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has an empty option-hash when parsing from a String and passing no options' do
|
24
|
+
root = RP.parse('a+')
|
25
|
+
|
26
|
+
expect(root.options).to be_empty
|
27
|
+
end
|
28
|
+
end
|
@@ -13,6 +13,10 @@ RSpec.describe('Escape scanning') do
|
|
13
13
|
|
14
14
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
15
15
|
|
16
|
+
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
17
|
+
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
18
|
+
include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
|
19
|
+
|
16
20
|
include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
|
17
21
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
18
22
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
|
|
39
39
|
11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
|
40
40
|
17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
|
41
41
|
29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
|
42
|
+
|
43
|
+
# single line / no trailing newline (c.f. issue #66)
|
44
|
+
include_examples 'scan', /a # b/x,
|
45
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
46
|
+
1 => [:free_space, :whitespace, ' ', 1, 2],
|
47
|
+
2 => [:free_space, :comment, "# b", 2, 5]
|
48
|
+
|
49
|
+
# without spaces (c.f. issue #66)
|
50
|
+
include_examples 'scan', /a#b/x,
|
51
|
+
0 => [:literal, :literal, 'a', 0, 1],
|
52
|
+
1 => [:free_space, :comment, "#b", 1, 3]
|
42
53
|
end
|
43
54
|
|
44
55
|
describe('scan free space inlined') do
|
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
|
|
130
141
|
26 => [:literal, :literal, 'i j', 35, 38],
|
131
142
|
27 => [:group, :close, ')', 38, 39]
|
132
143
|
end
|
144
|
+
|
145
|
+
describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
|
146
|
+
include_examples 'scan', /a#bcd/,
|
147
|
+
0 => [:literal, :literal, 'a#bcd', 0, 5]
|
148
|
+
include_examples 'scan', /a # bcd/,
|
149
|
+
0 => [:literal, :literal, 'a # bcd', 0, 7]
|
150
|
+
|
151
|
+
include_examples 'scan', /a#\d/,
|
152
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
153
|
+
1 => [:type, :digit, '\d', 2, 4]
|
154
|
+
include_examples 'scan', /a # \d/,
|
155
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
156
|
+
1 => [:type, :digit, '\d', 4, 6]
|
157
|
+
|
158
|
+
include_examples 'scan', /a#()/,
|
159
|
+
0 => [:literal, :literal, 'a#', 0, 2],
|
160
|
+
1 => [:group, :capture, '(', 2, 3]
|
161
|
+
include_examples 'scan', /a # ()/,
|
162
|
+
0 => [:literal, :literal, 'a # ', 0, 4],
|
163
|
+
1 => [:group, :capture, '(', 4, 5]
|
164
|
+
end
|
133
165
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to scan') do
|
4
|
+
def expect_type_tokens(tokens, type_tokens)
|
5
|
+
expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'raises if if scanning from a Regexp and options are passed' do
|
9
|
+
expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
10
|
+
ArgumentError,
|
11
|
+
'options cannot be supplied unless scanning a String'
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'sets free_spacing based on options if scanning from a String' do
|
16
|
+
expect_type_tokens(
|
17
|
+
RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
|
18
|
+
[
|
19
|
+
%i[literal literal],
|
20
|
+
%i[quantifier one_or_more],
|
21
|
+
%i[free_space comment]
|
22
|
+
]
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'does not set free_spacing if scanning from a String and passing no options' do
|
27
|
+
expect_type_tokens(
|
28
|
+
RS.scan('a+#c'),
|
29
|
+
[
|
30
|
+
%i[literal literal],
|
31
|
+
%i[quantifier one_or_more],
|
32
|
+
%i[literal literal]
|
33
|
+
]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -124,6 +124,7 @@ files:
|
|
124
124
|
- spec/parser/free_space_spec.rb
|
125
125
|
- spec/parser/groups_spec.rb
|
126
126
|
- spec/parser/keep_spec.rb
|
127
|
+
- spec/parser/options_spec.rb
|
127
128
|
- spec/parser/posix_classes_spec.rb
|
128
129
|
- spec/parser/properties_spec.rb
|
129
130
|
- spec/parser/quantifiers_spec.rb
|
@@ -143,6 +144,7 @@ files:
|
|
143
144
|
- spec/scanner/keep_spec.rb
|
144
145
|
- spec/scanner/literals_spec.rb
|
145
146
|
- spec/scanner/meta_spec.rb
|
147
|
+
- spec/scanner/options_spec.rb
|
146
148
|
- spec/scanner/properties_spec.rb
|
147
149
|
- spec/scanner/quantifiers_spec.rb
|
148
150
|
- spec/scanner/refcalls_spec.rb
|
@@ -176,84 +178,86 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
176
178
|
requirements:
|
177
179
|
- - ">="
|
178
180
|
- !ruby/object:Gem::Version
|
179
|
-
version:
|
181
|
+
version: 2.0.0
|
180
182
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
181
183
|
requirements:
|
182
184
|
- - ">="
|
183
185
|
- !ruby/object:Gem::Version
|
184
186
|
version: '0'
|
185
187
|
requirements: []
|
186
|
-
rubygems_version: 3.1
|
188
|
+
rubygems_version: 3.2.0.rc.1
|
187
189
|
signing_key:
|
188
190
|
specification_version: 4
|
189
191
|
summary: Scanner, lexer, parser for ruby's regular expressions
|
190
192
|
test_files:
|
191
|
-
- spec/
|
192
|
-
- spec/
|
193
|
-
- spec/
|
194
|
-
- spec/
|
193
|
+
- spec/expression/base_spec.rb
|
194
|
+
- spec/expression/clone_spec.rb
|
195
|
+
- spec/expression/conditional_spec.rb
|
196
|
+
- spec/expression/free_space_spec.rb
|
197
|
+
- spec/expression/methods/match_length_spec.rb
|
198
|
+
- spec/expression/methods/match_spec.rb
|
199
|
+
- spec/expression/methods/strfregexp_spec.rb
|
200
|
+
- spec/expression/methods/tests_spec.rb
|
201
|
+
- spec/expression/methods/traverse_spec.rb
|
202
|
+
- spec/expression/options_spec.rb
|
203
|
+
- spec/expression/root_spec.rb
|
204
|
+
- spec/expression/sequence_spec.rb
|
205
|
+
- spec/expression/subexpression_spec.rb
|
206
|
+
- spec/expression/to_h_spec.rb
|
207
|
+
- spec/expression/to_s_spec.rb
|
195
208
|
- spec/lexer/all_spec.rb
|
196
209
|
- spec/lexer/conditionals_spec.rb
|
197
|
-
- spec/lexer/nesting_spec.rb
|
198
210
|
- spec/lexer/delimiters_spec.rb
|
199
|
-
- spec/lexer/
|
211
|
+
- spec/lexer/escapes_spec.rb
|
212
|
+
- spec/lexer/keep_spec.rb
|
200
213
|
- spec/lexer/literals_spec.rb
|
201
|
-
- spec/
|
202
|
-
- spec/
|
203
|
-
- spec/parser/sets_spec.rb
|
204
|
-
- spec/parser/free_space_spec.rb
|
205
|
-
- spec/parser/keep_spec.rb
|
214
|
+
- spec/lexer/nesting_spec.rb
|
215
|
+
- spec/lexer/refcalls_spec.rb
|
206
216
|
- spec/parser/all_spec.rb
|
207
|
-
- spec/parser/conditionals_spec.rb
|
208
|
-
- spec/parser/types_spec.rb
|
209
|
-
- spec/parser/anchors_spec.rb
|
210
217
|
- spec/parser/alternation_spec.rb
|
211
|
-
- spec/parser/
|
212
|
-
- spec/parser/
|
213
|
-
- spec/parser/set/intersections_spec.rb
|
218
|
+
- spec/parser/anchors_spec.rb
|
219
|
+
- spec/parser/conditionals_spec.rb
|
214
220
|
- spec/parser/errors_spec.rb
|
215
|
-
- spec/parser/
|
221
|
+
- spec/parser/escapes_spec.rb
|
222
|
+
- spec/parser/free_space_spec.rb
|
216
223
|
- spec/parser/groups_spec.rb
|
224
|
+
- spec/parser/keep_spec.rb
|
225
|
+
- spec/parser/options_spec.rb
|
226
|
+
- spec/parser/posix_classes_spec.rb
|
227
|
+
- spec/parser/properties_spec.rb
|
217
228
|
- spec/parser/quantifiers_spec.rb
|
218
|
-
- spec/
|
219
|
-
- spec/
|
220
|
-
- spec/
|
221
|
-
- spec/
|
222
|
-
- spec/
|
223
|
-
- spec/expression/methods/match_length_spec.rb
|
224
|
-
- spec/expression/methods/traverse_spec.rb
|
225
|
-
- spec/expression/methods/strfregexp_spec.rb
|
226
|
-
- spec/expression/methods/tests_spec.rb
|
227
|
-
- spec/expression/free_space_spec.rb
|
228
|
-
- spec/expression/options_spec.rb
|
229
|
-
- spec/expression/to_s_spec.rb
|
230
|
-
- spec/expression/root_spec.rb
|
231
|
-
- spec/expression/sequence_spec.rb
|
232
|
-
- spec/expression/clone_spec.rb
|
233
|
-
- spec/expression/to_h_spec.rb
|
234
|
-
- spec/expression/conditional_spec.rb
|
235
|
-
- spec/expression/base_spec.rb
|
236
|
-
- spec/syntax/syntax_spec.rb
|
237
|
-
- spec/syntax/syntax_token_map_spec.rb
|
238
|
-
- spec/syntax/versions/1.9.3_spec.rb
|
239
|
-
- spec/syntax/versions/2.2.0_spec.rb
|
240
|
-
- spec/syntax/versions/1.9.1_spec.rb
|
241
|
-
- spec/syntax/versions/2.0.0_spec.rb
|
242
|
-
- spec/syntax/versions/1.8.6_spec.rb
|
243
|
-
- spec/syntax/versions/aliases_spec.rb
|
244
|
-
- spec/scanner/escapes_spec.rb
|
245
|
-
- spec/scanner/properties_spec.rb
|
246
|
-
- spec/scanner/sets_spec.rb
|
247
|
-
- spec/scanner/free_space_spec.rb
|
248
|
-
- spec/scanner/keep_spec.rb
|
229
|
+
- spec/parser/refcalls_spec.rb
|
230
|
+
- spec/parser/set/intersections_spec.rb
|
231
|
+
- spec/parser/set/ranges_spec.rb
|
232
|
+
- spec/parser/sets_spec.rb
|
233
|
+
- spec/parser/types_spec.rb
|
249
234
|
- spec/scanner/all_spec.rb
|
250
|
-
- spec/scanner/conditionals_spec.rb
|
251
|
-
- spec/scanner/types_spec.rb
|
252
235
|
- spec/scanner/anchors_spec.rb
|
253
|
-
- spec/scanner/
|
254
|
-
- spec/scanner/errors_spec.rb
|
236
|
+
- spec/scanner/conditionals_spec.rb
|
255
237
|
- spec/scanner/delimiters_spec.rb
|
256
|
-
- spec/scanner/
|
238
|
+
- spec/scanner/errors_spec.rb
|
239
|
+
- spec/scanner/escapes_spec.rb
|
240
|
+
- spec/scanner/free_space_spec.rb
|
257
241
|
- spec/scanner/groups_spec.rb
|
242
|
+
- spec/scanner/keep_spec.rb
|
258
243
|
- spec/scanner/literals_spec.rb
|
244
|
+
- spec/scanner/meta_spec.rb
|
245
|
+
- spec/scanner/options_spec.rb
|
246
|
+
- spec/scanner/properties_spec.rb
|
259
247
|
- spec/scanner/quantifiers_spec.rb
|
248
|
+
- spec/scanner/refcalls_spec.rb
|
249
|
+
- spec/scanner/sets_spec.rb
|
250
|
+
- spec/scanner/types_spec.rb
|
251
|
+
- spec/spec_helper.rb
|
252
|
+
- spec/support/runner.rb
|
253
|
+
- spec/support/shared_examples.rb
|
254
|
+
- spec/support/warning_extractor.rb
|
255
|
+
- spec/syntax/syntax_spec.rb
|
256
|
+
- spec/syntax/syntax_token_map_spec.rb
|
257
|
+
- spec/syntax/versions/1.8.6_spec.rb
|
258
|
+
- spec/syntax/versions/1.9.1_spec.rb
|
259
|
+
- spec/syntax/versions/1.9.3_spec.rb
|
260
|
+
- spec/syntax/versions/2.0.0_spec.rb
|
261
|
+
- spec/syntax/versions/2.2.0_spec.rb
|
262
|
+
- spec/syntax/versions/aliases_spec.rb
|
263
|
+
- spec/token/token_spec.rb
|