regexp_parser 2.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +1 -4
- data/lib/regexp_parser/scanner.rb +998 -1095
- data/lib/regexp_parser/scanner/scanner.rl +24 -31
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/scanner/escapes_spec.rb +1 -0
- data/spec/scanner/groups_spec.rb +10 -1
- metadata +3 -3
@@ -3,6 +3,11 @@
|
|
3
3
|
include re_char_type "char_type.rl";
|
4
4
|
include re_property "property.rl";
|
5
5
|
|
6
|
+
utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
|
7
|
+
utf8_3_byte = (0xe0..0xef 0x80..0xbf 0x80..0xbf);
|
8
|
+
utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
9
|
+
utf8_multibyte = utf8_2_byte | utf8_3_byte | utf8_4_byte;
|
10
|
+
|
6
11
|
dot = '.';
|
7
12
|
backslash = '\\';
|
8
13
|
alternation = '|';
|
@@ -90,18 +95,19 @@
|
|
90
95
|
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
91
96
|
|
92
97
|
group_ref = [gk];
|
93
|
-
|
94
|
-
|
95
|
-
group_number = '-'? . [1-9] .
|
98
|
+
group_name_id_ab = ([^0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
|
99
|
+
group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
|
100
|
+
group_number = '-'? . [1-9] . [0-9]*;
|
96
101
|
group_level = [+\-] . [0-9]+;
|
97
102
|
|
98
|
-
group_name = ('<' .
|
103
|
+
group_name = ('<' . group_name_id_ab? . '>') |
|
104
|
+
("'" . group_name_id_sq? . "'");
|
99
105
|
group_lookup = group_name | group_number;
|
100
106
|
|
101
107
|
group_named = ('?' . group_name );
|
102
108
|
|
103
|
-
group_name_ref = group_ref . (('<' .
|
104
|
-
("'" .
|
109
|
+
group_name_ref = group_ref . (('<' . group_name_id_ab? . group_level? '>') |
|
110
|
+
("'" . group_name_id_sq? . group_level? "'"));
|
105
111
|
|
106
112
|
group_number_ref = group_ref . (('<' . group_number . group_level? '>') |
|
107
113
|
("'" . group_number . group_level? "'"));
|
@@ -123,10 +129,6 @@
|
|
123
129
|
ascii_print = ((0x20..0x7e) - meta_char - '#');
|
124
130
|
ascii_nonprint = (0x01..0x1f | 0x7f);
|
125
131
|
|
126
|
-
utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
|
127
|
-
utf8_3_byte = (0xe0..0xef 0x80..0xbf 0x80..0xbf);
|
128
|
-
utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
|
129
|
-
|
130
132
|
non_literal_escape = char_type_char | anchor_char | escaped_ascii |
|
131
133
|
keep_mark | [xucCM];
|
132
134
|
|
@@ -238,11 +240,7 @@
|
|
238
240
|
emit(:literal, :literal, copy(data, ts, te))
|
239
241
|
};
|
240
242
|
|
241
|
-
any
|
242
|
-
ascii_nonprint |
|
243
|
-
utf8_2_byte |
|
244
|
-
utf8_3_byte |
|
245
|
-
utf8_4_byte {
|
243
|
+
any | ascii_nonprint | utf8_multibyte {
|
246
244
|
text = copy(data, ts, te)
|
247
245
|
emit(:literal, :literal, text)
|
248
246
|
};
|
@@ -325,7 +323,7 @@
|
|
325
323
|
fret;
|
326
324
|
};
|
327
325
|
|
328
|
-
hex_sequence > (escaped_alpha, 5)
|
326
|
+
hex_sequence > (escaped_alpha, 5) @eof(premature_end_error) {
|
329
327
|
emit(:escape, :hex, copy(data, ts-1, te))
|
330
328
|
fret;
|
331
329
|
};
|
@@ -356,10 +354,7 @@
|
|
356
354
|
fcall unicode_property;
|
357
355
|
};
|
358
356
|
|
359
|
-
(any -- non_literal_escape) |
|
360
|
-
utf8_2_byte |
|
361
|
-
utf8_3_byte |
|
362
|
-
utf8_4_byte > (escaped_alpha, 1) {
|
357
|
+
(any -- non_literal_escape) | utf8_multibyte > (escaped_alpha, 1) {
|
363
358
|
emit(:escape, :literal, copy(data, ts-1, te))
|
364
359
|
fret;
|
365
360
|
};
|
@@ -511,10 +506,10 @@
|
|
511
506
|
when /^\(\?(?:<>|'')/
|
512
507
|
validation_error(:group, 'named group', 'name is empty')
|
513
508
|
|
514
|
-
when /^\(
|
509
|
+
when /^\(\?<[^>]+>/
|
515
510
|
emit(:group, :named_ab, text)
|
516
511
|
|
517
|
-
when /^\(\?'
|
512
|
+
when /^\(\?'[^']+'/
|
518
513
|
emit(:group, :named_sq, text)
|
519
514
|
|
520
515
|
end
|
@@ -548,14 +543,16 @@
|
|
548
543
|
when /^\\([gk])(<>|'')/ # angle brackets
|
549
544
|
validation_error(:backref, 'ref/call', 'ref ID is empty')
|
550
545
|
|
551
|
-
|
546
|
+
# TODO: finer quirks of choosing recursive or non-recursive refs/calls.
|
547
|
+
# e.g.: `a-1` is a valid group id: 'aa'[/(?<a-1>a)\g<a-1>/] # => 'aa'
|
548
|
+
when /^\\([gk])<[^\p{digit}+\->][^>+\-]*>/ # angle-brackets
|
552
549
|
if $1 == 'k'
|
553
550
|
emit(:backref, :name_ref_ab, text)
|
554
551
|
else
|
555
552
|
emit(:backref, :name_call_ab, text)
|
556
553
|
end
|
557
554
|
|
558
|
-
when /^\\([gk])'[^\
|
555
|
+
when /^\\([gk])'[^\p{digit}+\-'][^'+\-]*'/ # single quotes
|
559
556
|
if $1 == 'k'
|
560
557
|
emit(:backref, :name_ref_sq, text)
|
561
558
|
else
|
@@ -590,10 +587,10 @@
|
|
590
587
|
emit(:backref, :number_rel_call_sq, text)
|
591
588
|
end
|
592
589
|
|
593
|
-
when /^\\k<[^\
|
590
|
+
when /^\\k<[^\p{digit}+\->][^>]*[+\-]\d+>/ # angle-brackets
|
594
591
|
emit(:backref, :name_recursion_ref_ab, text)
|
595
592
|
|
596
|
-
when /^\\k'[^\
|
593
|
+
when /^\\k'[^\p{digit}+\-'][^']*[+\-]\d+'/ # single-quotes
|
597
594
|
emit(:backref, :name_recursion_ref_sq, text)
|
598
595
|
|
599
596
|
when /^\\([gk])<[+\-]?\d+[+\-]\d+>/ # angle-brackets
|
@@ -668,11 +665,7 @@
|
|
668
665
|
# Literal: any run of ASCII (pritable or non-printable), and/or UTF-8,
|
669
666
|
# except meta characters.
|
670
667
|
# ------------------------------------------------------------------------
|
671
|
-
(ascii_print -- space)+
|
672
|
-
ascii_nonprint+ |
|
673
|
-
utf8_2_byte+ |
|
674
|
-
utf8_3_byte+ |
|
675
|
-
utf8_4_byte+ {
|
668
|
+
(ascii_print -- space)+ | ascii_nonprint+ | utf8_multibyte+ {
|
676
669
|
append_literal(data, ts, te)
|
677
670
|
};
|
678
671
|
|
@@ -27,6 +27,7 @@ RSpec.describe('Escape scanning') do
|
|
27
27
|
include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
|
28
28
|
include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
|
29
29
|
|
30
|
+
include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
|
30
31
|
include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
|
31
32
|
include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
|
32
33
|
|
data/spec/scanner/groups_spec.rb
CHANGED
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
|
|
5
5
|
include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
|
6
6
|
include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
|
7
7
|
|
8
|
+
# Named groups
|
9
|
+
# only names that start with a hyphen or digit (ascii or other) are invalid
|
8
10
|
include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
|
9
11
|
include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
|
10
|
-
|
11
12
|
include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
|
12
13
|
include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
|
14
|
+
include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
|
15
|
+
include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
|
16
|
+
include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
|
17
|
+
include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
|
18
|
+
include_examples 'scan', '(?<üüuuüü>abc)', 0 => [:group, :named_ab, '(?<üüuuüü>', 0,10]
|
19
|
+
include_examples 'scan', "(?'üüuuüü'abc)", 0 => [:group, :named_sq, "(?'üüuuüü'", 0,10]
|
20
|
+
include_examples 'scan', "(?<😋1234😋>abc)", 0 => [:group, :named_ab, "(?<😋1234😋>", 0,10]
|
21
|
+
include_examples 'scan', "(?'😋1234😋'abc)", 0 => [:group, :named_sq, "(?'😋1234😋'", 0,10]
|
13
22
|
|
14
23
|
include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
|
15
24
|
include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
183
|
- !ruby/object:Gem::Version
|
184
184
|
version: '0'
|
185
185
|
requirements: []
|
186
|
-
rubygems_version: 3.2.0
|
186
|
+
rubygems_version: 3.2.0
|
187
187
|
signing_key:
|
188
188
|
specification_version: 4
|
189
189
|
summary: Scanner, lexer, parser for ruby's regular expressions
|