regexp_parser 1.7.1 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -0
- data/README.md +12 -1
- data/lib/regexp_parser/lexer.rb +4 -4
- data/lib/regexp_parser/parser.rb +18 -12
- data/lib/regexp_parser/scanner.rb +1002 -1008
- data/lib/regexp_parser/scanner/scanner.rl +20 -13
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/parser/free_space_spec.rb +25 -4
- data/spec/parser/options_spec.rb +28 -0
- data/spec/scanner/escapes_spec.rb +4 -0
- data/spec/scanner/options_spec.rb +36 -0
- metadata +6 -2
@@ -21,7 +21,7 @@
|
|
21
21
|
set_close = ']';
|
22
22
|
brackets = set_open | set_close;
|
23
23
|
|
24
|
-
comment = ('#' . [^\n]* . '\n');
|
24
|
+
comment = ('#' . [^\n]* . '\n'?);
|
25
25
|
|
26
26
|
class_name_posix = 'alnum' | 'alpha' | 'blank' |
|
27
27
|
'cntrl' | 'digit' | 'graph' |
|
@@ -120,7 +120,7 @@
|
|
120
120
|
|
121
121
|
literal_delimiters = ']' | '}';
|
122
122
|
|
123
|
-
ascii_print = ((0x20..0x7e) - meta_char);
|
123
|
+
ascii_print = ((0x20..0x7e) - meta_char - '#');
|
124
124
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
125
125
|
|
126
126
|
utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
|
@@ -128,7 +128,7 @@
|
|
128
128
|
utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
129
129
|
|
130
130
|
non_literal_escape = char_type_char | anchor_char | escaped_ascii |
|
131
|
-
|
131
|
+
keep_mark | [xucCM];
|
132
132
|
|
133
133
|
non_set_escape = (anchor_char - 'b') | group_ref | keep_mark |
|
134
134
|
multi_codepoint_char_type | [0-9cCM];
|
@@ -737,21 +737,16 @@ class Regexp::Scanner
|
|
737
737
|
#
|
738
738
|
# This method may raise errors if a syntax error is encountered.
|
739
739
|
# --------------------------------------------------------------------------
|
740
|
-
def self.scan(input_object, &block)
|
741
|
-
new.scan(input_object, &block)
|
740
|
+
def self.scan(input_object, options: nil, &block)
|
741
|
+
new.scan(input_object, options: options, &block)
|
742
742
|
end
|
743
743
|
|
744
|
-
def scan(input_object, &block)
|
744
|
+
def scan(input_object, options: nil, &block)
|
745
745
|
self.literal = nil
|
746
746
|
stack = []
|
747
747
|
|
748
|
-
|
749
|
-
|
750
|
-
self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
|
751
|
-
else
|
752
|
-
input = input_object
|
753
|
-
self.free_spacing = false
|
754
|
-
end
|
748
|
+
input = input_object.is_a?(Regexp) ? input_object.source : input_object
|
749
|
+
self.free_spacing = free_spacing?(input_object, options)
|
755
750
|
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
756
751
|
|
757
752
|
data = input.unpack("c*") if input.is_a?(String)
|
@@ -817,6 +812,18 @@ class Regexp::Scanner
|
|
817
812
|
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
|
818
813
|
:group_depth, :set_depth, :conditional_stack
|
819
814
|
|
815
|
+
def free_spacing?(input_object, options)
|
816
|
+
if options && !input_object.is_a?(String)
|
817
|
+
raise ArgumentError, 'options cannot be supplied unless scanning a String'
|
818
|
+
end
|
819
|
+
|
820
|
+
options = input_object.options if input_object.is_a?(::Regexp)
|
821
|
+
|
822
|
+
return false unless options
|
823
|
+
|
824
|
+
options & Regexp::EXTENDED != 0
|
825
|
+
end
|
826
|
+
|
820
827
|
def in_group?
|
821
828
|
group_depth > 0
|
822
829
|
end
|
@@ -24,13 +24,34 @@ RSpec.describe('FreeSpace parsing') do
|
|
24
24
|
expect(root.first.text).to eq 'a b c d'
|
25
25
|
end
|
26
26
|
|
27
|
+
specify('parse single-line free space comments without spaces') do
|
28
|
+
regexp = /a#b/x
|
29
|
+
|
30
|
+
root = RP.parse(regexp)
|
31
|
+
expect(root.length).to eq 2
|
32
|
+
|
33
|
+
expect(root[0]).to be_instance_of(Literal)
|
34
|
+
expect(root[1]).to be_instance_of(Comment)
|
35
|
+
end
|
36
|
+
|
37
|
+
specify('parse single-line free space comments with spaces') do
|
38
|
+
regexp = /a # b/x
|
39
|
+
|
40
|
+
root = RP.parse(regexp)
|
41
|
+
expect(root.length).to eq 3
|
42
|
+
|
43
|
+
expect(root[0]).to be_instance_of(Literal)
|
44
|
+
expect(root[1]).to be_instance_of(WhiteSpace)
|
45
|
+
expect(root[2]).to be_instance_of(Comment)
|
46
|
+
end
|
47
|
+
|
27
48
|
specify('parse free space comments') do
|
28
49
|
regexp = /
|
29
50
|
a ? # One letter
|
30
51
|
b {2,5} # Another one
|
31
52
|
[c-g] + # A set
|
32
53
|
(h|i|j) | # A group
|
33
|
-
klm
|
54
|
+
klm#nospace before or after comment hash
|
34
55
|
nop +
|
35
56
|
/x
|
36
57
|
|
@@ -51,11 +72,11 @@ RSpec.describe('FreeSpace parsing') do
|
|
51
72
|
|
52
73
|
alt_2 = alt.alternatives.last
|
53
74
|
expect(alt_2).to be_instance_of(Alternative)
|
54
|
-
expect(alt_2.length).to eq
|
75
|
+
expect(alt_2.length).to eq 8
|
55
76
|
|
56
|
-
[0, 2,
|
77
|
+
[0, 2, 5, 7].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
|
57
78
|
|
58
|
-
expect(alt_2[
|
79
|
+
[1, 4].each { |i| expect(alt_2[i]).to be_instance_of(Comment) }
|
59
80
|
end
|
60
81
|
|
61
82
|
specify('parse free space nested comments') do
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to parse') do
|
4
|
+
it 'raises if if parsing from a Regexp and options are passed' do
|
5
|
+
expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
6
|
+
ArgumentError,
|
7
|
+
'options cannot be supplied unless parsing a String'
|
8
|
+
)
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'sets options if parsing from a String' do
|
12
|
+
root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
|
13
|
+
|
14
|
+
expect(root.options).to eq(m: true, x: true)
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'allows options to not be supplied when parsing from a Regexp' do
|
18
|
+
root = RP.parse(/a+/ix)
|
19
|
+
|
20
|
+
expect(root.options).to eq(i: true, x: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has an empty option-hash when parsing from a String and passing no options' do
|
24
|
+
root = RP.parse('a+')
|
25
|
+
|
26
|
+
expect(root.options).to be_empty
|
27
|
+
end
|
28
|
+
end
|
@@ -13,6 +13,10 @@ RSpec.describe('Escape scanning') do
|
|
13
13
|
|
14
14
|
include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
|
15
15
|
|
16
|
+
# these incomplete ref/call sequences are treated as literal escapes by Ruby
|
17
|
+
include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
|
18
|
+
include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
|
19
|
+
|
16
20
|
include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
|
17
21
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
18
22
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
RSpec.describe('passing options to scan') do
|
4
|
+
def expect_type_tokens(tokens, type_tokens)
|
5
|
+
expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
|
6
|
+
end
|
7
|
+
|
8
|
+
it 'raises if if scanning from a Regexp and options are passed' do
|
9
|
+
expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
|
10
|
+
ArgumentError,
|
11
|
+
'options cannot be supplied unless scanning a String'
|
12
|
+
)
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'sets free_spacing based on options if scanning from a String' do
|
16
|
+
expect_type_tokens(
|
17
|
+
RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
|
18
|
+
[
|
19
|
+
%i[literal literal],
|
20
|
+
%i[quantifier one_or_more],
|
21
|
+
%i[free_space comment]
|
22
|
+
]
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'does not set free_spacing if scanning from a String and passing no options' do
|
27
|
+
expect_type_tokens(
|
28
|
+
RS.scan('a+#c'),
|
29
|
+
[
|
30
|
+
%i[literal literal],
|
31
|
+
%i[quantifier one_or_more],
|
32
|
+
%i[literal literal]
|
33
|
+
]
|
34
|
+
)
|
35
|
+
end
|
36
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -124,6 +124,7 @@ files:
|
|
124
124
|
- spec/parser/free_space_spec.rb
|
125
125
|
- spec/parser/groups_spec.rb
|
126
126
|
- spec/parser/keep_spec.rb
|
127
|
+
- spec/parser/options_spec.rb
|
127
128
|
- spec/parser/posix_classes_spec.rb
|
128
129
|
- spec/parser/properties_spec.rb
|
129
130
|
- spec/parser/quantifiers_spec.rb
|
@@ -143,6 +144,7 @@ files:
|
|
143
144
|
- spec/scanner/keep_spec.rb
|
144
145
|
- spec/scanner/literals_spec.rb
|
145
146
|
- spec/scanner/meta_spec.rb
|
147
|
+
- spec/scanner/options_spec.rb
|
146
148
|
- spec/scanner/properties_spec.rb
|
147
149
|
- spec/scanner/quantifiers_spec.rb
|
148
150
|
- spec/scanner/refcalls_spec.rb
|
@@ -203,6 +205,7 @@ test_files:
|
|
203
205
|
- spec/parser/sets_spec.rb
|
204
206
|
- spec/parser/free_space_spec.rb
|
205
207
|
- spec/parser/keep_spec.rb
|
208
|
+
- spec/parser/options_spec.rb
|
206
209
|
- spec/parser/all_spec.rb
|
207
210
|
- spec/parser/conditionals_spec.rb
|
208
211
|
- spec/parser/types_spec.rb
|
@@ -246,6 +249,7 @@ test_files:
|
|
246
249
|
- spec/scanner/sets_spec.rb
|
247
250
|
- spec/scanner/free_space_spec.rb
|
248
251
|
- spec/scanner/keep_spec.rb
|
252
|
+
- spec/scanner/options_spec.rb
|
249
253
|
- spec/scanner/all_spec.rb
|
250
254
|
- spec/scanner/conditionals_spec.rb
|
251
255
|
- spec/scanner/types_spec.rb
|