regexp_parser 0.4.5 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +8 -0
- data/README.md +6 -5
- data/lib/regexp_parser/expression/classes/type.rb +11 -9
- data/lib/regexp_parser/parser.rb +10 -2
- data/lib/regexp_parser/scanner/property.rl +1 -2
- data/lib/regexp_parser/scanner/scanner.rl +6 -2
- data/lib/regexp_parser/scanner.rb +1098 -969
- data/lib/regexp_parser/syntax/ruby/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/ruby/2.0.0.rb +4 -0
- data/lib/regexp_parser/syntax/tokens/character_set.rb +5 -2
- data/lib/regexp_parser/syntax/tokens/character_type.rb +3 -1
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -0
- data/lib/regexp_parser/version.rb +1 -1
- data/test/parser/test_escapes.rb +24 -0
- data/test/parser/test_properties.rb +12 -0
- data/test/parser/test_sets.rb +10 -0
- data/test/parser/test_types.rb +18 -0
- data/test/scanner/test_errors.rb +5 -0
- data/test/scanner/test_escapes.rb +3 -0
- data/test/scanner/test_properties.rb +8 -0
- data/test/scanner/test_sets.rb +7 -0
- data/test/scanner/test_types.rb +3 -0
- data/test/syntax/ruby/test_1.9.1.rb +2 -1
- metadata +3 -3
@@ -15,7 +15,7 @@ module Regexp::Syntax
|
|
15
15
|
implements :backref, Backreference::All +
|
16
16
|
SubexpressionCall::All
|
17
17
|
|
18
|
-
implements :escape, Escape::Unicode + Escape::Hex
|
18
|
+
implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
|
19
19
|
|
20
20
|
implements :type, CharacterType::Hex
|
21
21
|
|
@@ -12,6 +12,10 @@ module Regexp::Syntax
|
|
12
12
|
implements :conditional, Conditional::All
|
13
13
|
implements :property, UnicodeProperty::V200
|
14
14
|
implements :nonproperty, UnicodeProperty::V200
|
15
|
+
|
16
|
+
implements :type, CharacterType::Clustered
|
17
|
+
implements :set, CharacterSet::Clustered
|
18
|
+
implements :subset, CharacterSet::Clustered
|
15
19
|
end
|
16
20
|
end
|
17
21
|
|
@@ -5,11 +5,14 @@ module Regexp::Syntax
|
|
5
5
|
OpenClose = [:open, :close]
|
6
6
|
|
7
7
|
Basic = [:negate, :member, :range]
|
8
|
-
Extended = Basic + [:escape, :intersection, :
|
8
|
+
Extended = Basic + [:escape, :intersection, :backspace,
|
9
|
+
:member_hex, :range_hex]
|
9
10
|
|
10
11
|
Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
|
11
12
|
:type_space, :type_nonspace, :type_word, :type_nonword]
|
12
13
|
|
14
|
+
Clustered = [:type_linebreak, :type_xgrapheme]
|
15
|
+
|
13
16
|
module POSIX
|
14
17
|
Standard = [
|
15
18
|
:class_alnum, :class_alpha, :class_blank, :class_cntrl,
|
@@ -30,7 +33,7 @@ module Regexp::Syntax
|
|
30
33
|
All = Standard + StandardNegative + Extensions + ExtensionsNegative
|
31
34
|
end
|
32
35
|
|
33
|
-
All = Basic + Extended + Types + POSIX::All
|
36
|
+
All = Basic + Extended + Types + Clustered + POSIX::All
|
34
37
|
Type = :set
|
35
38
|
|
36
39
|
module SubSet
|
data/test/parser/test_escapes.rb
CHANGED
@@ -32,6 +32,9 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
32
32
|
|
33
33
|
# hex escapes
|
34
34
|
/a\xFF/n => [1, :escape, :hex, EscapeSequence::Literal],
|
35
|
+
|
36
|
+
# octal escapes
|
37
|
+
/a\177/n => [1, :escape, :octal, EscapeSequence::Literal],
|
35
38
|
}
|
36
39
|
|
37
40
|
tests.each_with_index do |(pattern, (index, type, token, klass)), count|
|
@@ -75,4 +78,25 @@ class TestParserEscapes < Test::Unit::TestCase
|
|
75
78
|
assert_equal '\\M-\\C-X', root[2].text
|
76
79
|
end
|
77
80
|
|
81
|
+
def test_parse_lower_c_meta_control_sequence
|
82
|
+
root = RP.parse(/\A\\\M-\cX/n)
|
83
|
+
|
84
|
+
assert_equal EscapeSequence::MetaControl, root[2].class
|
85
|
+
assert_equal '\\M-\\cX', root[2].text
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_parse_escape_reverse_meta_control_sequence
|
89
|
+
root = RP.parse(/\A\\\C-\M-X/n)
|
90
|
+
|
91
|
+
assert_equal EscapeSequence::MetaControl, root[2].class
|
92
|
+
assert_equal '\\C-\\M-X', root[2].text
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_parse_escape_reverse_lower_c_meta_control_sequence
|
96
|
+
root = RP.parse(/\A\\\c\M-X/n)
|
97
|
+
|
98
|
+
assert_equal EscapeSequence::MetaControl, root[2].class
|
99
|
+
assert_equal '\\c\\M-X', root[2].text
|
100
|
+
end
|
101
|
+
|
78
102
|
end
|
@@ -318,6 +318,18 @@ class ParserProperties < Test::Unit::TestCase
|
|
318
318
|
assert_equal true, t.expressions[1].negative?
|
319
319
|
end
|
320
320
|
|
321
|
+
def test_parse_caret_nonproperty_negative
|
322
|
+
t = RP.parse 'ab\p{^L}cd', 'ruby/1.9'
|
323
|
+
|
324
|
+
assert_equal true, t.expressions[1].negative?
|
325
|
+
end
|
326
|
+
|
327
|
+
def test_parse_double_negated_property_negative
|
328
|
+
t = RP.parse 'ab\P{^L}cd', 'ruby/1.9'
|
329
|
+
|
330
|
+
assert_equal false, t.expressions[1].negative?
|
331
|
+
end
|
332
|
+
|
321
333
|
def test_parse_property_age
|
322
334
|
t = RP.parse 'ab\p{age=5.2}cd', 'ruby/1.9'
|
323
335
|
|
data/test/parser/test_sets.rb
CHANGED
@@ -39,6 +39,16 @@ class TestParserSets < Test::Unit::TestCase
|
|
39
39
|
assert_equal false, exp.include?(']')
|
40
40
|
end
|
41
41
|
|
42
|
+
def test_parse_hex_members
|
43
|
+
root = RP.parse('[\x20\x24-\x26\x28]', :any)
|
44
|
+
exp = root.expressions.at(0)
|
45
|
+
|
46
|
+
assert_equal true, exp.include?('\x20')
|
47
|
+
assert_equal true, exp.include?('\x24-\x26')
|
48
|
+
assert_equal true, exp.include?('\x28')
|
49
|
+
assert_equal false, exp.include?(']')
|
50
|
+
end
|
51
|
+
|
42
52
|
def test_parse_chat_type_set_members
|
43
53
|
root = RP.parse('[\da-z]', :any)
|
44
54
|
exp = root.expressions.at(0)
|
data/test/parser/test_types.rb
CHANGED
@@ -29,4 +29,22 @@ class TestParserTypes < Test::Unit::TestCase
|
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
|
+
tests_2_0 = {
|
33
|
+
'a\Rc' => [1, :type, :linebreak, CharacterType::Linebreak],
|
34
|
+
'a\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme],
|
35
|
+
}
|
36
|
+
|
37
|
+
tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
|
38
|
+
define_method "test_parse_type_#{token}_#{count}" do
|
39
|
+
root = RP.parse(pattern, 'ruby/2.0')
|
40
|
+
exp = root.expressions.at(index)
|
41
|
+
|
42
|
+
assert exp.is_a?( klass ),
|
43
|
+
"Expected #{klass}, but got #{exp.class.name}"
|
44
|
+
|
45
|
+
assert_equal type, exp.type
|
46
|
+
assert_equal token, exp.token
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
32
50
|
end
|
data/test/scanner/test_errors.rb
CHANGED
@@ -72,14 +72,19 @@ class ScannerErrors < Test::Unit::TestCase
|
|
72
72
|
|
73
73
|
def test_scanner_eof_in_control_sequence
|
74
74
|
assert_raise( RS::PrematureEndError ) { RS.scan('\c') }
|
75
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\c\M') }
|
76
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\c\M-') }
|
75
77
|
assert_raise( RS::PrematureEndError ) { RS.scan('\C') }
|
76
78
|
assert_raise( RS::PrematureEndError ) { RS.scan('\C-') }
|
79
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M') }
|
80
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M-') }
|
77
81
|
end
|
78
82
|
|
79
83
|
def test_scanner_eof_in_meta_sequence
|
80
84
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M') }
|
81
85
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-') }
|
82
86
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\\') }
|
87
|
+
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\c') }
|
83
88
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C') }
|
84
89
|
assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C-') }
|
85
90
|
end
|
@@ -29,9 +29,12 @@ class ScannerEscapes < Test::Unit::TestCase
|
|
29
29
|
|
30
30
|
/a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
|
31
31
|
/a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
|
32
|
+
/a\c\M-Bc/n => [1, :escape, :control, '\c\M-B', 1, 7],
|
33
|
+
/a\C-\M-Bc/n => [1, :escape, :control, '\C-\M-B', 1, 8],
|
32
34
|
|
33
35
|
/a\M-Bc/n => [1, :escape, :meta_sequence, '\M-B', 1, 5],
|
34
36
|
/a\M-\C-Bc/n => [1, :escape, :meta_sequence, '\M-\C-B', 1, 8],
|
37
|
+
/a\M-\cBc/n => [1, :escape, :meta_sequence, '\M-\cB', 1, 7],
|
35
38
|
|
36
39
|
'ab\\\xcd' => [1, :escape, :backslash, '\\\\', 2, 4],
|
37
40
|
'ab\\\0cd' => [1, :escape, :backslash, '\\\\', 2, 4],
|
@@ -317,5 +317,13 @@ class ScannerProperties < Test::Unit::TestCase
|
|
317
317
|
assert_equal :nonproperty, result[0]
|
318
318
|
assert_equal token, result[1]
|
319
319
|
end
|
320
|
+
|
321
|
+
define_method "test_scan_double_negated_property_#{token}_#{count}" do
|
322
|
+
tokens = RS.scan("a\\P{^#{property}}c")
|
323
|
+
result = tokens.at(1)
|
324
|
+
|
325
|
+
assert_equal :property, result[0]
|
326
|
+
assert_equal token, result[1]
|
327
|
+
end
|
320
328
|
end
|
321
329
|
end
|
data/test/scanner/test_sets.rb
CHANGED
@@ -20,6 +20,8 @@ class ScannerSets < Test::Unit::TestCase
|
|
20
20
|
'[<]' => [1, :set, :member, '<', 1, 2],
|
21
21
|
'[>]' => [1, :set, :member, '>', 1, 2],
|
22
22
|
|
23
|
+
'[\x20]' => [1, :set, :member_hex, '\x20', 1, 5],
|
24
|
+
|
23
25
|
'[\.]' => [1, :set, :escape, '\.', 1, 3],
|
24
26
|
'[\!]' => [1, :set, :escape, '\!', 1, 3],
|
25
27
|
'[\#]' => [1, :set, :escape, '\#', 1, 3],
|
@@ -40,6 +42,9 @@ class ScannerSets < Test::Unit::TestCase
|
|
40
42
|
'[\w]' => [1, :set, :type_word, '\w', 1, 3],
|
41
43
|
'[\W]' => [1, :set, :type_nonword, '\W', 1, 3],
|
42
44
|
|
45
|
+
'[\R]' => [1, :set, :type_linebreak, '\R', 1, 3],
|
46
|
+
'[\X]' => [1, :set, :type_xgrapheme, '\X', 1, 3],
|
47
|
+
|
43
48
|
'[a-c]' => [1, :set, :range, 'a-c', 1, 4],
|
44
49
|
'[a-c-]' => [2, :set, :member, '-', 4, 6],
|
45
50
|
'[a-c^]' => [2, :set, :member, '^', 4, 5],
|
@@ -58,6 +63,8 @@ class ScannerSets < Test::Unit::TestCase
|
|
58
63
|
|
59
64
|
'[a\p{digit}c]' => [2, :set, :digit, '\p{digit}', 2, 11],
|
60
65
|
'[a\P{digit}c]' => [2, :set, :digit, '\P{digit}', 2, 11],
|
66
|
+
'[a\p{^digit}c]' => [2, :set, :digit, '\p{^digit}', 2, 12],
|
67
|
+
'[a\P{^digit}c]' => [2, :set, :digit, '\P{^digit}', 2, 12],
|
61
68
|
|
62
69
|
'[a\p{ALPHA}c]' => [2, :set, :alpha, '\p{ALPHA}', 2, 11],
|
63
70
|
'[a\p{P}c]' => [2, :set, :punct_any, '\p{P}', 2, 7],
|
data/test/scanner/test_types.rb
CHANGED
@@ -14,6 +14,9 @@ class ScannerTypes < Test::Unit::TestCase
|
|
14
14
|
|
15
15
|
'a\wc' => [1, :type, :word, '\w', 1, 3],
|
16
16
|
'a\Wc' => [1, :type, :nonword, '\W', 1, 3],
|
17
|
+
|
18
|
+
'a\Rc' => [1, :type, :linebreak, '\R', 1, 3],
|
19
|
+
'a\Xc' => [1, :type, :xgrapheme, '\X', 1, 3],
|
17
20
|
}
|
18
21
|
|
19
22
|
tests.each do |(pattern, (index, type, token, text, ts, te))|
|
@@ -10,7 +10,8 @@ class TestSyntaxRuby_V191 < Test::Unit::TestCase
|
|
10
10
|
tests = {
|
11
11
|
:implements => {
|
12
12
|
:escape => [
|
13
|
-
Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode
|
13
|
+
Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode +
|
14
|
+
Escape::Hex + Escape::Octal
|
14
15
|
].flatten,
|
15
16
|
|
16
17
|
:type => [
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ammar Ali
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
14
14
|
email:
|
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
197
|
version: '0'
|
198
198
|
requirements: []
|
199
199
|
rubyforge_project:
|
200
|
-
rubygems_version: 2.6.
|
200
|
+
rubygems_version: 2.6.11
|
201
201
|
signing_key:
|
202
202
|
specification_version: 4
|
203
203
|
summary: Scanner, lexer, parser for ruby's regular expressions
|