regexp_parser 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +8 -0
- data/README.md +6 -5
- data/lib/regexp_parser/expression/classes/type.rb +11 -9
- data/lib/regexp_parser/parser.rb +10 -2
- data/lib/regexp_parser/scanner/property.rl +1 -2
- data/lib/regexp_parser/scanner/scanner.rl +6 -2
- data/lib/regexp_parser/scanner.rb +1098 -969
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -0
- data/lib/regexp_parser/syntax/tokens/character_set.rb +5 -2
- data/lib/regexp_parser/syntax/tokens/character_type.rb +3 -1
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/test/parser/test_escapes.rb +24 -0
- data/test/parser/test_properties.rb +12 -0
- data/test/parser/test_sets.rb +10 -0
- data/test/parser/test_types.rb +18 -0
- data/test/scanner/test_errors.rb +5 -0
- data/test/scanner/test_escapes.rb +3 -0
- data/test/scanner/test_properties.rb +8 -0
- data/test/scanner/test_sets.rb +7 -0
- data/test/scanner/test_types.rb +3 -0
- data/test/syntax/ruby/test_1.9.1.rb +2 -1
- metadata +3 -3
@@ -15,7 +15,7 @@ module Regexp::Syntax
|
|
15
15
|
implements :backref, Backreference::All +
|
16
16
|
SubexpressionCall::All
|
17
17
|
|
18
|
-
implements :escape, Escape::Unicode + Escape::Hex
|
18
|
+
implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
|
19
19
|
|
20
20
|
implements :type, CharacterType::Hex
|
21
21
|
|
@@ -12,6 +12,10 @@ module Regexp::Syntax
|
|
12
12
|
implements :conditional, Conditional::All
|
13
13
|
implements :property, UnicodeProperty::V200
|
14
14
|
implements :nonproperty, UnicodeProperty::V200
|
15
|
+
|
16
|
+
implements :type, CharacterType::Clustered
|
17
|
+
implements :set, CharacterSet::Clustered
|
18
|
+
implements :subset, CharacterSet::Clustered
|
15
19
|
end
|
16
20
|
end
|
17
21
|
|
@@ -5,11 +5,14 @@ module Regexp::Syntax
|
|
5
5
|
OpenClose = [:open, :close]
|
6
6
|
|
7
7
|
Basic = [:negate, :member, :range]
|
8
|
-
Extended = Basic + [:escape, :intersection, :
|
8
|
+
Extended = Basic + [:escape, :intersection, :backspace,
|
9
|
+
:member_hex, :range_hex]
|
9
10
|
|
10
11
|
Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
|
11
12
|
:type_space, :type_nonspace, :type_word, :type_nonword]
|
12
13
|
|
14
|
+
Clustered = [:type_linebreak, :type_xgrapheme]
|
15
|
+
|
13
16
|
module POSIX
|
14
17
|
Standard = [
|
15
18
|
:class_alnum, :class_alpha, :class_blank, :class_cntrl,
|
@@ -30,7 +33,7 @@ module Regexp::Syntax
|
|
30
33
|
All = Standard + StandardNegative + Extensions + ExtensionsNegative
|
31
34
|
end
|
32
35
|
|
33
|
-
All = Basic + Extended + Types + POSIX::All
|
36
|
+
All = Basic + Extended + Types + Clustered + POSIX::All
|
34
37
|
Type = :set
|
35
38
|
|
36
39
|
module SubSet
|
data/test/parser/test_escapes.rb
CHANGED
@@ -32,6 +32,9 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
32
32
|
|
33
33
|
# hex escapes
|
34
34
|
/a\xFF/n => [1, :escape, :hex, EscapeSequence::Literal],
|
35
|
+
|
36
|
+
# octal escapes
|
37
|
+
/a\177/n => [1, :escape, :octal, EscapeSequence::Literal],
|
35
38
|
}
|
36
39
|
|
37
40
|
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
@@ -75,4 +78,25 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
75
78
|
assert_equal '\\M-\\C-X', root[2].text
|
76
79
|
end
|
77
80
|
|
81
|
+
def test_parse_lower_c_meta_control_sequence
|
82
|
+
root = RP.parse(/\A\\\M-\cX/n)
|
83
|
+
|
84
|
+
assert_equal EscapeSequence::MetaControl, root[2].class
|
85
|
+
assert_equal '\\M-\\cX', root[2].text
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_parse_escape_reverse_meta_control_sequence
|
89
|
+
root = RP.parse(/\A\\\C-\M-X/n)
|
90
|
+
|
91
|
+
assert_equal EscapeSequence::MetaControl, root[2].class
|
92
|
+
assert_equal '\\C-\\M-X', root[2].text
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_parse_escape_reverse_lower_c_meta_control_sequence
|
96
|
+
root = RP.parse(/\A\\\c\M-X/n)
|
97
|
+
|
98
|
+
assert_equal EscapeSequence::MetaControl, root[2].class
|
99
|
+
assert_equal '\\c\\M-X', root[2].text
|
100
|
+
end
|
101
|
+
|
78
102
|
end
|
@@ -318,6 +318,18 @@ class ParserProperties < Test::Unit::TestCase
|
|
318
318
|
assert_equal true, t.expressions[1].negative?
|
319
319
|
end
|
320
320
|
|
321
|
+
def test_parse_caret_nonproperty_negative
|
322
|
+
t = RP.parse 'ab\p{^L}cd', 'ruby/1.9'
|
323
|
+
|
324
|
+
assert_equal true, t.expressions[1].negative?
|
325
|
+
end
|
326
|
+
|
327
|
+
def test_parse_double_negated_property_negative
|
328
|
+
t = RP.parse 'ab\P{^L}cd', 'ruby/1.9'
|
329
|
+
|
330
|
+
assert_equal false, t.expressions[1].negative?
|
331
|
+
end
|
332
|
+
|
321
333
|
def test_parse_property_age
|
322
334
|
t = RP.parse 'ab\p{age=5.2}cd', 'ruby/1.9'
|
323
335
|
|
data/test/parser/test_sets.rb
CHANGED
@@ -39,6 +39,16 @@ class TestParserSets < Test::Unit::TestCase
|
|
39
39
|
assert_equal false, exp.include?(']')
|
40
40
|
end
|
41
41
|
|
42
|
+
def test_parse_hex_members
|
43
|
+
root = RP.parse('[\x20\x24-\x26\x28]', :any)
|
44
|
+
exp = root.expressions.at(0)
|
45
|
+
|
46
|
+
assert_equal true, exp.include?('\x20')
|
47
|
+
assert_equal true, exp.include?('\x24-\x26')
|
48
|
+
assert_equal true, exp.include?('\x28')
|
49
|
+
assert_equal false, exp.include?(']')
|
50
|
+
end
|
51
|
+
|
42
52
|
def test_parse_chat_type_set_members
|
43
53
|
root = RP.parse('[\da-z]', :any)
|
44
54
|
exp = root.expressions.at(0)
|
data/test/parser/test_types.rb
CHANGED
@@ -29,4 +29,22 @@ class TestParserTypes < Test::Unit::TestCase
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
+
tests_2_0 = {
|
33
|
+
'a\Rc' => [1, :type, :linebreak, CharacterType::Linebreak],
|
34
|
+
'a\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme],
|
35
|
+
}
|
36
|
+
|
37
|
+
tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
|
38
|
+
define_method "test_parse_type_#{token}_#{count}" do
|
39
|
+
root = RP.parse(pattern, 'ruby/2.0')
|
40
|
+
exp = root.expressions.at(index)
|
41
|
+
|
42
|
+
assert exp.is_a?( klass ),
|
43
|
+
"Expected #{klass}, but got #{exp.class.name}"
|
44
|
+
|
45
|
+
assert_equal type, exp.type
|
46
|
+
assert_equal token, exp.token
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
32
50
|
end
|
data/test/scanner/test_errors.rb
CHANGED
@@ -72,14 +72,19 @@ class ScannerErrors < Test::Unit::TestCase
|
|
72
72
|
|
73
73
|
def test_scanner_eof_in_control_sequence
|
74
74
|
assert_raise( RS::PrematureEndError ) { RS.scan('\c') }
|
75
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\c\M') }
|
76
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\c\M-') }
|
75
77
|
assert_raise( RS::PrematureEndError ) { RS.scan('\C') }
|
76
78
|
assert_raise( RS::PrematureEndError ) { RS.scan('\C-') }
|
79
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M') }
|
80
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M-') }
|
77
81
|
end
|
78
82
|
|
79
83
|
def test_scanner_eof_in_meta_sequence
|
80
84
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M') }
|
81
85
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-') }
|
82
86
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\\') }
|
87
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\c') }
|
83
88
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C') }
|
84
89
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C-') }
|
85
90
|
end
|
@@ -29,9 +29,12 @@ class ScannerEscapes < Test::Unit::TestCase
|
|
29
29
|
|
30
30
|
/a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
|
31
31
|
/a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
|
32
|
+
/a\c\M-Bc/n => [1, :escape, :control, '\c\M-B', 1, 7],
|
33
|
+
/a\C-\M-Bc/n => [1, :escape, :control, '\C-\M-B', 1, 8],
|
32
34
|
|
33
35
|
/a\M-Bc/n => [1, :escape, :meta_sequence, '\M-B', 1, 5],
|
34
36
|
/a\M-\C-Bc/n => [1, :escape, :meta_sequence, '\M-\C-B', 1, 8],
|
37
|
+
/a\M-\cBc/n => [1, :escape, :meta_sequence, '\M-\cB', 1, 7],
|
35
38
|
|
36
39
|
'ab\\\xcd' => [1, :escape, :backslash, '\\\\', 2, 4],
|
37
40
|
'ab\\\0cd' => [1, :escape, :backslash, '\\\\', 2, 4],
|
@@ -317,5 +317,13 @@ class ScannerProperties < Test::Unit::TestCase
|
|
317
317
|
assert_equal :nonproperty, result[0]
|
318
318
|
assert_equal token, result[1]
|
319
319
|
end
|
320
|
+
|
321
|
+
define_method "test_scan_double_negated_property_#{token}_#{count}" do
|
322
|
+
tokens = RS.scan("a\\P{^#{property}}c")
|
323
|
+
result = tokens.at(1)
|
324
|
+
|
325
|
+
assert_equal :property, result[0]
|
326
|
+
assert_equal token, result[1]
|
327
|
+
end
|
320
328
|
end
|
321
329
|
end
|
data/test/scanner/test_sets.rb
CHANGED
@@ -20,6 +20,8 @@ class ScannerSets < Test::Unit::TestCase
|
|
20
20
|
'[<]' => [1, :set, :member, '<', 1, 2],
|
21
21
|
'[>]' => [1, :set, :member, '>', 1, 2],
|
22
22
|
|
23
|
+
'[\x20]' => [1, :set, :member_hex, '\x20', 1, 5],
|
24
|
+
|
23
25
|
'[\.]' => [1, :set, :escape, '\.', 1, 3],
|
24
26
|
'[\!]' => [1, :set, :escape, '\!', 1, 3],
|
25
27
|
'[\#]' => [1, :set, :escape, '\#', 1, 3],
|
@@ -40,6 +42,9 @@ class ScannerSets < Test::Unit::TestCase
|
|
40
42
|
'[\w]' => [1, :set, :type_word, '\w', 1, 3],
|
41
43
|
'[\W]' => [1, :set, :type_nonword, '\W', 1, 3],
|
42
44
|
|
45
|
+
'[\R]' => [1, :set, :type_linebreak, '\R', 1, 3],
|
46
|
+
'[\X]' => [1, :set, :type_xgrapheme, '\X', 1, 3],
|
47
|
+
|
43
48
|
'[a-c]' => [1, :set, :range, 'a-c', 1, 4],
|
44
49
|
'[a-c-]' => [2, :set, :member, '-', 4, 6],
|
45
50
|
'[a-c^]' => [2, :set, :member, '^', 4, 5],
|
@@ -58,6 +63,8 @@ class ScannerSets < Test::Unit::TestCase
|
|
58
63
|
|
59
64
|
'[a\p{digit}c]' => [2, :set, :digit, '\p{digit}', 2, 11],
|
60
65
|
'[a\P{digit}c]' => [2, :set, :digit, '\P{digit}', 2, 11],
|
66
|
+
'[a\p{^digit}c]' => [2, :set, :digit, '\p{^digit}', 2, 12],
|
67
|
+
'[a\P{^digit}c]' => [2, :set, :digit, '\P{^digit}', 2, 12],
|
61
68
|
|
62
69
|
'[a\p{ALPHA}c]' => [2, :set, :alpha, '\p{ALPHA}', 2, 11],
|
63
70
|
'[a\p{P}c]' => [2, :set, :punct_any, '\p{P}', 2, 7],
|
data/test/scanner/test_types.rb
CHANGED
@@ -14,6 +14,9 @@ class ScannerTypes < Test::Unit::TestCase
|
|
14
14
|
|
15
15
|
'a\wc' => [1, :type, :word, '\w', 1, 3],
|
16
16
|
'a\Wc' => [1, :type, :nonword, '\W', 1, 3],
|
17
|
+
|
18
|
+
'a\Rc' => [1, :type, :linebreak, '\R', 1, 3],
|
19
|
+
'a\Xc' => [1, :type, :xgrapheme, '\X', 1, 3],
|
17
20
|
}
|
18
21
|
|
19
22
|
tests.each do |(pattern, (index, type, token, text, ts, te))|
|
@@ -10,7 +10,8 @@ class TestSyntaxRuby_V191 < Test::Unit::TestCase
|
|
10
10
|
tests = {
|
11
11
|
:implements => {
|
12
12
|
:escape => [
|
13
|
-
Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode
|
13
|
+
Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode +
|
14
|
+
Escape::Hex + Escape::Octal
|
14
15
|
].flatten,
|
15
16
|
|
16
17
|
:type => [
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
197
|
version: '0'
|
198
198
|
requirements: []
|
199
199
|
rubyforge_project:
|
200
|
-
rubygems_version: 2.6.
|
200
|
+
rubygems_version: 2.6.11
|
201
201
|
signing_key:
|
202
202
|
specification_version: 4
|
203
203
|
summary: Scanner, lexer, parser for ruby's regular expressions
|