regexp_parser 1.7.1 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/README.md +12 -1
- data/lib/regexp_parser/lexer.rb +4 -4
- data/lib/regexp_parser/parser.rb +18 -12
- data/lib/regexp_parser/scanner.rb +1002 -1008
- data/lib/regexp_parser/scanner/scanner.rl +20 -13
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/parser/free_space_spec.rb +25 -4
- data/spec/parser/options_spec.rb +28 -0
- data/spec/scanner/escapes_spec.rb +4 -0
- data/spec/scanner/options_spec.rb +36 -0
- metadata +6 -2
@@ -21,7 +21,7 @@
|
|
21
21
|
set_close = ']';
|
22
22
|
brackets = set_open | set_close;
|
23
23
|
|
24
|
-
comment = ('#' . [^\n]* . '\n');
|
24
|
+
comment = ('#' . [^\n]* . '\n'?);
|
25
25
|
|
26
26
|
class_name_posix = 'alnum' | 'alpha' | 'blank' |
|
27
27
|
'cntrl' | 'digit' | 'graph' |
|
@@ -120,7 +120,7 @@
|
|
120
120
|
|
121
121
|
literal_delimiters = ']' | '}';
|
122
122
|
|
123
|
-
ascii_print = ((0x20..0x7e) - meta_char);
|
123
|
+
ascii_print = ((0x20..0x7e) - meta_char - '#');
|
124
124
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
125
125
|
|
126
126
|
utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
|
@@ -128,7 +128,7 @@
|
|
128
128
|
utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
129
129
|
|
130
130
|
non_literal_escape = char_type_char | anchor_char | escaped_ascii |
|
131
|
-
|
131
|
+
keep_mark | [xucCM];
|
132
132
|
|
133
133
|
non_set_escape = (anchor_char - 'b') | group_ref | keep_mark |
|
134
134
|
multi_codepoint_char_type | [0-9cCM];
|
@@ -737,21 +737,16 @@ class Regexp::Scanner
|
|
737
737
|
#
|
738
738
|
# This method may raise errors if a syntax error is encountered.
|
739
739
|
# --------------------------------------------------------------------------
|
740
|
-
def self.scan(input_object, &block)
|
741
|
-
new.scan(input_object, &block)
|
740
|
+
def self.scan(input_object, options: nil, &block)
|
741
|
+
new.scan(input_object, options: options, &block)
|
742
742
|
end
|
743
743
|
|
744
|
-
def scan(input_object, &block)
|
744
|
+
def scan(input_object, options: nil, &block)
|
745
745
|
self.literal = nil
|
746
746
|
stack = []
|
747
747
|
|
748
|
-
|
749
|
-
|
750
|
-
self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
|
751
|
-
else
|
752
|
-
input = input_object
|
753
|
-
self.free_spacing = false
|
754
|
-
end
|
748
|
+
input = input_object.is_a?(Regexp) ? input_object.source : input_object
|
749
|
+
self.free_spacing = free_spacing?(input_object, options)
|
755
750
|
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
756
751
|
|
757
752
|
data = input.unpack("c*") if input.is_a?(String)
|
@@ -817,6 +812,18 @@ class Regexp::Scanner
|
|
817
812
|
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
|
818
813
|
:group_depth, :set_depth, :conditional_stack
|
819
814
|
|
815
|
+
def free_spacing?(input_object, options)
|
816
|
+
if options && !input_object.is_a?(String)
|
817
|
+
raise ArgumentError, 'options cannot be supplied unless scanning a String'
|
818
|
+
end
|
819
|
+
|
820
|
+
options = input_object.options if input_object.is_a?(::Regexp)
|
821
|
+
|
822
|
+
return false unless options
|
823
|
+
|
824
|
+
options & Regexp::EXTENDED != 0
|
825
|
+
end
|
826
|
+
|
820
827
|
def in_group?
|
821
828
|
group_depth > 0
|
822
829
|
end
|
@@ -24,13 +24,34 @@ RSpec.describe('FreeSpace parsing') do
|
|
24
24
|
expect(root.first.text).to eq 'a b c d'
|
25
25
|
end
|
26
26
|
|
27
|
+
specify('parse single-line free space comments without spaces') do
|
28
|
+
regexp = /a#b/x
|
29
|
+
|
30
|
+
root = RP.parse(regexp)
|
31
|
+
expect(root.length).to eq 2
|
32
|
+
|
33
|
+
expect(root[0]).to be_instance_of(Literal)
|
34
|
+
expect(root[1]).to be_instance_of(Comment)
|
35
|
+
end
|
36
|
+
|
37
|
+
specify('parse single-line free space comments with spaces') do
|
38
|
+
regexp = /a # b/x
|
39
|
+
|
40
|
+
root = RP.parse(regexp)
|
41
|
+
expect(root.length).to eq 3
|
42
|
+
|
43
|
+
expect(root[0]).to be_instance_of(Literal)
|
44
|
+
expect(root[1]).to be_instance_of(WhiteSpace)
|
45
|
+
expect(root[2]).to be_instance_of(Comment)
|
46
|
+
end
|
47
|
+
|
27
48
|
specify('parse free space comments') do
|
28
49
|
regexp = /
|
29
50
|
a ? # One letter
|
30
51
|
b {2,5} # Another one
|
31
52
|
[c-g] + # A set
|
32
53
|
(h|i|j) | # A group
|
33
|
-
klm
|
54
|
+
klm#nospace before or after comment hash
|
34
55
|
nop +
|
35
56
|
/x
|
36
57
|
|
@@ -51,11 +72,11 @@ RSpec.describe('FreeSpace parsing') do
|
|
51
72
|
|
52
73
|
alt_2 = alt.alternatives.last
|
53
74
|
expect(alt_2).to be_instance_of(Alternative)
|
54
|
-
expect(alt_2.length).to eq
|
75
|
+
expect(alt_2.length).to eq 8
|
55
76
|
|
56
|
-
[0, 2,
|
77
|
+
[0, 2, 5, 7].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
|
57
78
|
|
58
|
-
expect(alt_2[
|
79
|
+
[1, 4].each { |i| expect(alt_2[i]).to be_instance_of(Comment) }
|
59
80
|
end
|
60
81
|
|
61
82
|
specify('parse free space nested comments') do
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to parse') do
|
4
|
+
it 'raises if if parsing from a Regexp and options are passed' do
|
5
|
+
expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
6
|
+
ArgumentError,
|
7
|
+
'options cannot be supplied unless parsing a String'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'sets options if parsing from a String' do
|
12
|
+
root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
|
13
|
+
|
14
|
+
expect(root.options).to eq(m: true, x: true)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'allows options to not be supplied when parsing from a Regexp' do
|
18
|
+
root = RP.parse(/a+/ix)
|
19
|
+
|
20
|
+
expect(root.options).to eq(i: true, x: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has an empty option-hash when parsing from a String and passing no options' do
|
24
|
+
root = RP.parse('a+')
|
25
|
+
|
26
|
+
expect(root.options).to be_empty
|
27
|
+
end
|
28
|
+
end
|
@@ -13,6 +13,10 @@ RSpec.describe('Escape scanning') do
|
|
13
13
|
|
14
14
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
15
15
|
|
16
|
+
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
17
|
+
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
18
|
+
include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
|
19
|
+
|
16
20
|
include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
|
17
21
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
18
22
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to scan') do
|
4
|
+
def expect_type_tokens(tokens, type_tokens)
|
5
|
+
expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'raises if if scanning from a Regexp and options are passed' do
|
9
|
+
expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
10
|
+
ArgumentError,
|
11
|
+
'options cannot be supplied unless scanning a String'
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'sets free_spacing based on options if scanning from a String' do
|
16
|
+
expect_type_tokens(
|
17
|
+
RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
|
18
|
+
[
|
19
|
+
%i[literal literal],
|
20
|
+
%i[quantifier one_or_more],
|
21
|
+
%i[free_space comment]
|
22
|
+
]
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'does not set free_spacing if scanning from a String and passing no options' do
|
27
|
+
expect_type_tokens(
|
28
|
+
RS.scan('a+#c'),
|
29
|
+
[
|
30
|
+
%i[literal literal],
|
31
|
+
%i[quantifier one_or_more],
|
32
|
+
%i[literal literal]
|
33
|
+
]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -124,6 +124,7 @@ files:
|
|
124
124
|
- spec/parser/free_space_spec.rb
|
125
125
|
- spec/parser/groups_spec.rb
|
126
126
|
- spec/parser/keep_spec.rb
|
127
|
+
- spec/parser/options_spec.rb
|
127
128
|
- spec/parser/posix_classes_spec.rb
|
128
129
|
- spec/parser/properties_spec.rb
|
129
130
|
- spec/parser/quantifiers_spec.rb
|
@@ -143,6 +144,7 @@ files:
|
|
143
144
|
- spec/scanner/keep_spec.rb
|
144
145
|
- spec/scanner/literals_spec.rb
|
145
146
|
- spec/scanner/meta_spec.rb
|
147
|
+
- spec/scanner/options_spec.rb
|
146
148
|
- spec/scanner/properties_spec.rb
|
147
149
|
- spec/scanner/quantifiers_spec.rb
|
148
150
|
- spec/scanner/refcalls_spec.rb
|
@@ -203,6 +205,7 @@ test_files:
|
|
203
205
|
- spec/parser/sets_spec.rb
|
204
206
|
- spec/parser/free_space_spec.rb
|
205
207
|
- spec/parser/keep_spec.rb
|
208
|
+
- spec/parser/options_spec.rb
|
206
209
|
- spec/parser/all_spec.rb
|
207
210
|
- spec/parser/conditionals_spec.rb
|
208
211
|
- spec/parser/types_spec.rb
|
@@ -246,6 +249,7 @@ test_files:
|
|
246
249
|
- spec/scanner/sets_spec.rb
|
247
250
|
- spec/scanner/free_space_spec.rb
|
248
251
|
- spec/scanner/keep_spec.rb
|
252
|
+
- spec/scanner/options_spec.rb
|
249
253
|
- spec/scanner/all_spec.rb
|
250
254
|
- spec/scanner/conditionals_spec.rb
|
251
255
|
- spec/scanner/types_spec.rb
|