regexp_parser 0.4.5 → 0.4.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,7 +15,7 @@ module Regexp::Syntax
15
15
  implements :backref, Backreference::All +
16
16
  SubexpressionCall::All
17
17
 
18
- implements :escape, Escape::Unicode + Escape::Hex
18
+ implements :escape, Escape::Unicode + Escape::Hex + Escape::Octal
19
19
 
20
20
  implements :type, CharacterType::Hex
21
21
 
@@ -12,6 +12,10 @@ module Regexp::Syntax
12
12
  implements :conditional, Conditional::All
13
13
  implements :property, UnicodeProperty::V200
14
14
  implements :nonproperty, UnicodeProperty::V200
15
+
16
+ implements :type, CharacterType::Clustered
17
+ implements :set, CharacterSet::Clustered
18
+ implements :subset, CharacterSet::Clustered
15
19
  end
16
20
  end
17
21
 
@@ -5,11 +5,14 @@ module Regexp::Syntax
5
5
  OpenClose = [:open, :close]
6
6
 
7
7
  Basic = [:negate, :member, :range]
8
- Extended = Basic + [:escape, :intersection, :range_hex, :backspace]
8
+ Extended = Basic + [:escape, :intersection, :backspace,
9
+ :member_hex, :range_hex]
9
10
 
10
11
  Types = [:type_digit, :type_nondigit, :type_hex, :type_nonhex,
11
12
  :type_space, :type_nonspace, :type_word, :type_nonword]
12
13
 
14
+ Clustered = [:type_linebreak, :type_xgrapheme]
15
+
13
16
  module POSIX
14
17
  Standard = [
15
18
  :class_alnum, :class_alpha, :class_blank, :class_cntrl,
@@ -30,7 +33,7 @@ module Regexp::Syntax
30
33
  All = Standard + StandardNegative + Extensions + ExtensionsNegative
31
34
  end
32
35
 
33
- All = Basic + Extended + Types + POSIX::All
36
+ All = Basic + Extended + Types + Clustered + POSIX::All
34
37
  Type = :set
35
38
 
36
39
  module SubSet
@@ -6,7 +6,9 @@ module Regexp::Syntax
6
6
  Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
7
7
  Hex = [:hex, :nonhex]
8
8
 
9
- All = Basic + Extended + Hex
9
+ Clustered = [:linebreak, :xgrapheme]
10
+
11
+ All = Basic + Extended + Hex + Clustered
10
12
  Type = :type
11
13
  end
12
14
 
@@ -23,6 +23,8 @@ module Regexp::Syntax
23
23
 
24
24
  Hex = [:hex]
25
25
 
26
+ Octal = [:octal]
27
+
26
28
  All = Basic + Backreference + ASCII + Meta
27
29
  Type = :escape
28
30
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  module Parser
3
- VERSION = '0.4.5'
3
+ VERSION = '0.4.6'
4
4
  end
5
5
  end
@@ -32,6 +32,9 @@ class TestParserEscapes < Test::Unit::TestCase
32
32
 
33
33
  # hex escapes
34
34
  /a\xFF/n => [1, :escape, :hex, EscapeSequence::Literal],
35
+
36
+ # octal escapes
37
+ /a\177/n => [1, :escape, :octal, EscapeSequence::Literal],
35
38
  }
36
39
 
37
40
  tests.each_with_index do |(pattern, (index, type, token, klass)), count|
@@ -75,4 +78,25 @@ class TestParserEscapes < Test::Unit::TestCase
75
78
  assert_equal '\\M-\\C-X', root[2].text
76
79
  end
77
80
 
81
+ def test_parse_lower_c_meta_control_sequence
82
+ root = RP.parse(/\A\\\M-\cX/n)
83
+
84
+ assert_equal EscapeSequence::MetaControl, root[2].class
85
+ assert_equal '\\M-\\cX', root[2].text
86
+ end
87
+
88
+ def test_parse_escape_reverse_meta_control_sequence
89
+ root = RP.parse(/\A\\\C-\M-X/n)
90
+
91
+ assert_equal EscapeSequence::MetaControl, root[2].class
92
+ assert_equal '\\C-\\M-X', root[2].text
93
+ end
94
+
95
+ def test_parse_escape_reverse_lower_c_meta_control_sequence
96
+ root = RP.parse(/\A\\\c\M-X/n)
97
+
98
+ assert_equal EscapeSequence::MetaControl, root[2].class
99
+ assert_equal '\\c\\M-X', root[2].text
100
+ end
101
+
78
102
  end
@@ -318,6 +318,18 @@ class ParserProperties < Test::Unit::TestCase
318
318
  assert_equal true, t.expressions[1].negative?
319
319
  end
320
320
 
321
+ def test_parse_caret_nonproperty_negative
322
+ t = RP.parse 'ab\p{^L}cd', 'ruby/1.9'
323
+
324
+ assert_equal true, t.expressions[1].negative?
325
+ end
326
+
327
+ def test_parse_double_negated_property_negative
328
+ t = RP.parse 'ab\P{^L}cd', 'ruby/1.9'
329
+
330
+ assert_equal false, t.expressions[1].negative?
331
+ end
332
+
321
333
  def test_parse_property_age
322
334
  t = RP.parse 'ab\p{age=5.2}cd', 'ruby/1.9'
323
335
 
@@ -39,6 +39,16 @@ class TestParserSets < Test::Unit::TestCase
39
39
  assert_equal false, exp.include?(']')
40
40
  end
41
41
 
42
+ def test_parse_hex_members
43
+ root = RP.parse('[\x20\x24-\x26\x28]', :any)
44
+ exp = root.expressions.at(0)
45
+
46
+ assert_equal true, exp.include?('\x20')
47
+ assert_equal true, exp.include?('\x24-\x26')
48
+ assert_equal true, exp.include?('\x28')
49
+ assert_equal false, exp.include?(']')
50
+ end
51
+
42
52
  def test_parse_chat_type_set_members
43
53
  root = RP.parse('[\da-z]', :any)
44
54
  exp = root.expressions.at(0)
@@ -29,4 +29,22 @@ class TestParserTypes < Test::Unit::TestCase
29
29
  end
30
30
  end
31
31
 
32
+ tests_2_0 = {
33
+ 'a\Rc' => [1, :type, :linebreak, CharacterType::Linebreak],
34
+ 'a\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme],
35
+ }
36
+
37
+ tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
38
+ define_method "test_parse_type_#{token}_#{count}" do
39
+ root = RP.parse(pattern, 'ruby/2.0')
40
+ exp = root.expressions.at(index)
41
+
42
+ assert exp.is_a?( klass ),
43
+ "Expected #{klass}, but got #{exp.class.name}"
44
+
45
+ assert_equal type, exp.type
46
+ assert_equal token, exp.token
47
+ end
48
+ end
49
+
32
50
  end
@@ -72,14 +72,19 @@ class ScannerErrors < Test::Unit::TestCase
72
72
 
73
73
  def test_scanner_eof_in_control_sequence
74
74
  assert_raise( RS::PrematureEndError ) { RS.scan('\c') }
75
+ assert_raise( RS::PrematureEndError ) { RS.scan('\c\M') }
76
+ assert_raise( RS::PrematureEndError ) { RS.scan('\c\M-') }
75
77
  assert_raise( RS::PrematureEndError ) { RS.scan('\C') }
76
78
  assert_raise( RS::PrematureEndError ) { RS.scan('\C-') }
79
+ assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M') }
80
+ assert_raise( RS::PrematureEndError ) { RS.scan('\C-\M-') }
77
81
  end
78
82
 
79
83
  def test_scanner_eof_in_meta_sequence
80
84
  assert_raise( RS::PrematureEndError ) { RS.scan('\M') }
81
85
  assert_raise( RS::PrematureEndError ) { RS.scan('\M-') }
82
86
  assert_raise( RS::PrematureEndError ) { RS.scan('\M-\\') }
87
+ assert_raise( RS::PrematureEndError ) { RS.scan('\M-\c') }
83
88
  assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C') }
84
89
  assert_raise( RS::PrematureEndError ) { RS.scan('\M-\C-') }
85
90
  end
@@ -29,9 +29,12 @@ class ScannerEscapes < Test::Unit::TestCase
29
29
 
30
30
  /a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
31
31
  /a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
32
+ /a\c\M-Bc/n => [1, :escape, :control, '\c\M-B', 1, 7],
33
+ /a\C-\M-Bc/n => [1, :escape, :control, '\C-\M-B', 1, 8],
32
34
 
33
35
  /a\M-Bc/n => [1, :escape, :meta_sequence, '\M-B', 1, 5],
34
36
  /a\M-\C-Bc/n => [1, :escape, :meta_sequence, '\M-\C-B', 1, 8],
37
+ /a\M-\cBc/n => [1, :escape, :meta_sequence, '\M-\cB', 1, 7],
35
38
 
36
39
  'ab\\\xcd' => [1, :escape, :backslash, '\\\\', 2, 4],
37
40
  'ab\\\0cd' => [1, :escape, :backslash, '\\\\', 2, 4],
@@ -317,5 +317,13 @@ class ScannerProperties < Test::Unit::TestCase
317
317
  assert_equal :nonproperty, result[0]
318
318
  assert_equal token, result[1]
319
319
  end
320
+
321
+ define_method "test_scan_double_negated_property_#{token}_#{count}" do
322
+ tokens = RS.scan("a\\P{^#{property}}c")
323
+ result = tokens.at(1)
324
+
325
+ assert_equal :property, result[0]
326
+ assert_equal token, result[1]
327
+ end
320
328
  end
321
329
  end
@@ -20,6 +20,8 @@ class ScannerSets < Test::Unit::TestCase
20
20
  '[<]' => [1, :set, :member, '<', 1, 2],
21
21
  '[>]' => [1, :set, :member, '>', 1, 2],
22
22
 
23
+ '[\x20]' => [1, :set, :member_hex, '\x20', 1, 5],
24
+
23
25
  '[\.]' => [1, :set, :escape, '\.', 1, 3],
24
26
  '[\!]' => [1, :set, :escape, '\!', 1, 3],
25
27
  '[\#]' => [1, :set, :escape, '\#', 1, 3],
@@ -40,6 +42,9 @@ class ScannerSets < Test::Unit::TestCase
40
42
  '[\w]' => [1, :set, :type_word, '\w', 1, 3],
41
43
  '[\W]' => [1, :set, :type_nonword, '\W', 1, 3],
42
44
 
45
+ '[\R]' => [1, :set, :type_linebreak, '\R', 1, 3],
46
+ '[\X]' => [1, :set, :type_xgrapheme, '\X', 1, 3],
47
+
43
48
  '[a-c]' => [1, :set, :range, 'a-c', 1, 4],
44
49
  '[a-c-]' => [2, :set, :member, '-', 4, 6],
45
50
  '[a-c^]' => [2, :set, :member, '^', 4, 5],
@@ -58,6 +63,8 @@ class ScannerSets < Test::Unit::TestCase
58
63
 
59
64
  '[a\p{digit}c]' => [2, :set, :digit, '\p{digit}', 2, 11],
60
65
  '[a\P{digit}c]' => [2, :set, :digit, '\P{digit}', 2, 11],
66
+ '[a\p{^digit}c]' => [2, :set, :digit, '\p{^digit}', 2, 12],
67
+ '[a\P{^digit}c]' => [2, :set, :digit, '\P{^digit}', 2, 12],
61
68
 
62
69
  '[a\p{ALPHA}c]' => [2, :set, :alpha, '\p{ALPHA}', 2, 11],
63
70
  '[a\p{P}c]' => [2, :set, :punct_any, '\p{P}', 2, 7],
@@ -14,6 +14,9 @@ class ScannerTypes < Test::Unit::TestCase
14
14
 
15
15
  'a\wc' => [1, :type, :word, '\w', 1, 3],
16
16
  'a\Wc' => [1, :type, :nonword, '\W', 1, 3],
17
+
18
+ 'a\Rc' => [1, :type, :linebreak, '\R', 1, 3],
19
+ 'a\Xc' => [1, :type, :xgrapheme, '\X', 1, 3],
17
20
  }
18
21
 
19
22
  tests.each do |(pattern, (index, type, token, text, ts, te))|
@@ -10,7 +10,8 @@ class TestSyntaxRuby_V191 < Test::Unit::TestCase
10
10
  tests = {
11
11
  :implements => {
12
12
  :escape => [
13
- Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode
13
+ Escape::Backreference + Escape::ASCII + Escape::Meta + Escape::Unicode +
14
+ Escape::Hex + Escape::Octal
14
15
  ].flatten,
15
16
 
16
17
  :type => [
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.4.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-17 00:00:00.000000000 Z
11
+ date: 2017-09-18 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
14
  email:
@@ -197,7 +197,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
197
  version: '0'
198
198
  requirements: []
199
199
  rubyforge_project:
200
- rubygems_version: 2.6.13
200
+ rubygems_version: 2.6.11
201
201
  signing_key:
202
202
  specification_version: 4
203
203
  summary: Scanner, lexer, parser for ruby's regular expressions