regexp_parser 1.5.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -0
  3. data/Gemfile +3 -3
  4. data/README.md +14 -6
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +6 -43
  7. data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  9. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  10. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  11. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  12. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  13. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  14. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  15. data/lib/regexp_parser/expression/sequence.rb +3 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  17. data/lib/regexp_parser/lexer.rb +4 -25
  18. data/lib/regexp_parser/parser.rb +40 -33
  19. data/lib/regexp_parser/scanner.rb +1208 -1353
  20. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  21. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  22. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  23. data/lib/regexp_parser/scanner/scanner.rl +116 -202
  24. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  25. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  26. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  27. data/lib/regexp_parser/version.rb +1 -1
  28. data/spec/expression/base_spec.rb +14 -0
  29. data/spec/expression/methods/match_length_spec.rb +20 -0
  30. data/spec/expression/methods/match_spec.rb +25 -0
  31. data/spec/expression/methods/tests_spec.rb +2 -0
  32. data/spec/expression/methods/traverse_spec.rb +21 -0
  33. data/spec/expression/options_spec.rb +128 -0
  34. data/spec/expression/root_spec.rb +9 -0
  35. data/spec/expression/sequence_spec.rb +9 -0
  36. data/spec/lexer/conditionals_spec.rb +49 -119
  37. data/spec/lexer/delimiters_spec.rb +68 -0
  38. data/spec/lexer/escapes_spec.rb +8 -32
  39. data/spec/lexer/keep_spec.rb +5 -17
  40. data/spec/lexer/literals_spec.rb +73 -110
  41. data/spec/lexer/nesting_spec.rb +86 -117
  42. data/spec/lexer/refcalls_spec.rb +51 -50
  43. data/spec/parser/all_spec.rb +13 -1
  44. data/spec/parser/anchors_spec.rb +9 -23
  45. data/spec/parser/conditionals_spec.rb +9 -9
  46. data/spec/parser/errors_spec.rb +22 -43
  47. data/spec/parser/escapes_spec.rb +33 -44
  48. data/spec/parser/free_space_spec.rb +25 -4
  49. data/spec/parser/groups_spec.rb +98 -257
  50. data/spec/parser/keep_spec.rb +2 -15
  51. data/spec/parser/options_spec.rb +28 -0
  52. data/spec/parser/posix_classes_spec.rb +5 -24
  53. data/spec/parser/properties_spec.rb +42 -54
  54. data/spec/parser/quantifiers_spec.rb +42 -283
  55. data/spec/parser/refcalls_spec.rb +60 -185
  56. data/spec/parser/set/intersections_spec.rb +17 -17
  57. data/spec/parser/set/ranges_spec.rb +17 -17
  58. data/spec/parser/sets_spec.rb +5 -5
  59. data/spec/parser/types_spec.rb +11 -36
  60. data/spec/scanner/anchors_spec.rb +13 -28
  61. data/spec/scanner/conditionals_spec.rb +121 -173
  62. data/spec/scanner/delimiters_spec.rb +52 -0
  63. data/spec/scanner/errors_spec.rb +64 -87
  64. data/spec/scanner/escapes_spec.rb +53 -50
  65. data/spec/scanner/free_space_spec.rb +102 -165
  66. data/spec/scanner/groups_spec.rb +45 -64
  67. data/spec/scanner/keep_spec.rb +5 -28
  68. data/spec/scanner/literals_spec.rb +45 -81
  69. data/spec/scanner/meta_spec.rb +13 -33
  70. data/spec/scanner/options_spec.rb +36 -0
  71. data/spec/scanner/properties_spec.rb +43 -286
  72. data/spec/scanner/quantifiers_spec.rb +13 -28
  73. data/spec/scanner/refcalls_spec.rb +32 -48
  74. data/spec/scanner/sets_spec.rb +88 -102
  75. data/spec/scanner/types_spec.rb +10 -25
  76. data/spec/spec_helper.rb +1 -0
  77. data/spec/support/shared_examples.rb +77 -0
  78. data/spec/syntax/syntax_spec.rb +4 -0
  79. data/spec/syntax/versions/1.8.6_spec.rb +12 -33
  80. data/spec/syntax/versions/1.9.1_spec.rb +5 -18
  81. data/spec/syntax/versions/1.9.3_spec.rb +4 -17
  82. data/spec/syntax/versions/2.0.0_spec.rb +8 -23
  83. data/spec/syntax/versions/2.2.0_spec.rb +4 -17
  84. data/spec/syntax/versions/aliases_spec.rb +27 -109
  85. metadata +28 -10
  86. data/spec/scanner/scripts_spec.rb +0 -49
  87. data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -1,54 +1,55 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('RefCall lexing') do
4
- tests = {
5
- # Traditional numerical group back-reference
6
- '(abc)\1' => [3, :backref, :number, '\1', 5, 7, 0, 0, 0],
7
-
8
- # Group back-references, named, numbered, and relative
9
- '(?<X>abc)\k<X>' => [3, :backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0],
10
- "(?<X>abc)\\k'X'" => [3, :backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0],
11
-
12
- '(abc)\k<1>' => [3, :backref, :number_ref, '\k<1>', 5, 10, 0, 0, 0],
13
- "(abc)\\k'1'" => [3, :backref, :number_ref, "\\k'1'", 5, 10, 0, 0, 0],
14
-
15
- '(abc)\k<-1>' => [3, :backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0, 0],
16
- "(abc)\\k'-1'" => [3, :backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0, 0],
17
-
18
- # Sub-expression invocation, named, numbered, and relative
19
- '(?<X>abc)\g<X>' => [3, :backref, :name_call, '\g<X>', 9, 14, 0, 0, 0],
20
- "(?<X>abc)\\g'X'" => [3, :backref, :name_call, "\\g'X'", 9, 14, 0, 0, 0],
21
-
22
- '(abc)\g<1>' => [3, :backref, :number_call, '\g<1>', 5, 10, 0, 0, 0],
23
- "(abc)\\g'1'" => [3, :backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0],
24
-
25
- '(abc)\g<-1>' => [3, :backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0],
26
- "(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0],
27
-
28
- '(abc)\g<+1>' => [3, :backref, :number_rel_call, '\g<+1>', 5, 11, 0, 0, 0],
29
- "(abc)\\g'+1'" => [3, :backref, :number_rel_call, "\\g'+1'", 5, 11, 0, 0, 0],
30
-
31
- # Group back-references, with nesting level
32
- '(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0],
33
- "(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
34
-
35
- '(abc)\k<1-0>' => [3, :backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0],
36
- "(abc)\\k'1-0'" => [3, :backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0],
37
- }
38
-
39
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
40
- specify("lexer_#{type}_#{token}_#{count}") do
41
- tokens = RL.lex(pattern, 'ruby/1.9')
42
- struct = tokens.at(index)
43
-
44
- expect(struct.type).to eq type
45
- expect(struct.token).to eq token
46
- expect(struct.text).to eq text
47
- expect(struct.ts).to eq ts
48
- expect(struct.te).to eq te
49
- expect(struct.level).to eq level
50
- expect(struct.set_level).to eq set_level
51
- expect(struct.conditional_level).to eq conditional_level
52
- end
53
- end
4
+ # Traditional numerical group back-reference
5
+ include_examples 'lex', '(abc)\1',
6
+ 3 => [:backref, :number, '\1', 5, 7, 0, 0, 0]
7
+
8
+ # Group back-references, named, numbered, and relative
9
+ include_examples 'lex', '(?<X>abc)\k<X>',
10
+ 3 => [:backref, :name_ref, '\k<X>', 9, 14, 0, 0, 0]
11
+ include_examples 'lex', "(?<X>abc)\\k'X'",
12
+ 3 => [:backref, :name_ref, "\\k'X'", 9, 14, 0, 0, 0]
13
+
14
+ include_examples 'lex', '(abc)\k<1>',
15
+ 3 => [:backref, :number_ref, '\k<1>', 5, 10, 0, 0, 0]
16
+ include_examples 'lex', "(abc)\\k'1'",
17
+ 3 => [:backref, :number_ref, "\\k'1'", 5, 10, 0, 0, 0]
18
+
19
+ include_examples 'lex', '(abc)\k<-1>',
20
+ 3 => [:backref, :number_rel_ref, '\k<-1>', 5, 11, 0, 0, 0]
21
+ include_examples 'lex', "(abc)\\k'-1'",
22
+ 3 => [:backref, :number_rel_ref, "\\k'-1'", 5, 11, 0, 0, 0]
23
+
24
+ # Sub-expression invocation, named, numbered, and relative
25
+ include_examples 'lex', '(?<X>abc)\g<X>',
26
+ 3 => [:backref, :name_call, '\g<X>', 9, 14, 0, 0, 0]
27
+ include_examples 'lex', "(?<X>abc)\\g'X'",
28
+ 3 => [:backref, :name_call, "\\g'X'", 9, 14, 0, 0, 0]
29
+
30
+ include_examples 'lex', '(abc)\g<1>',
31
+ 3 => [:backref, :number_call, '\g<1>', 5, 10, 0, 0, 0]
32
+ include_examples 'lex', "(abc)\\g'1'",
33
+ 3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
+
35
+ include_examples 'lex', '(abc)\g<-1>',
36
+ 3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
+ include_examples 'lex', "(abc)\\g'-1'",
38
+ 3 => [:backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0]
39
+
40
+ include_examples 'lex', '(abc)\g<+1>',
41
+ 3 => [:backref, :number_rel_call, '\g<+1>', 5, 11, 0, 0, 0]
42
+ include_examples 'lex', "(abc)\\g'+1'",
43
+ 3 => [:backref, :number_rel_call, "\\g'+1'", 5, 11, 0, 0, 0]
44
+
45
+ # Group back-references, with nesting level
46
+ include_examples 'lex', '(?<X>abc)\k<X-0>',
47
+ 3 => [:backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0]
48
+ include_examples 'lex', "(?<X>abc)\\k'X-0'",
49
+ 3 => [:backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0]
50
+
51
+ include_examples 'lex', '(abc)\k<1-0>',
52
+ 3 => [:backref, :number_recursion_ref, '\k<1-0>', 5, 12, 0, 0, 0]
53
+ include_examples 'lex', "(abc)\\k'1-0'",
54
+ 3 => [:backref, :number_recursion_ref, "\\k'1-0'", 5, 12, 0, 0, 0]
54
55
  end
@@ -2,7 +2,11 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe(Regexp::Parser) do
4
4
  specify('parse returns a root expression') do
5
- expect(RP.parse('abc')).to be_instance_of(Regexp::Expression::Root)
5
+ expect(RP.parse('abc')).to be_instance_of(Root)
6
+ end
7
+
8
+ specify('parse can be called with block') do
9
+ expect(RP.parse('abc') { |root| root.class }).to eq Root
6
10
  end
7
11
 
8
12
  specify('parse root contains expressions') do
@@ -10,6 +14,14 @@ RSpec.describe(Regexp::Parser) do
10
14
  expect(root.expressions).to all(be_a Regexp::Expression::Base)
11
15
  end
12
16
 
17
+ specify('parse root options mi') do
18
+ root = RP.parse(/[abc]/mi, 'ruby/1.8')
19
+
20
+ expect(root.m?).to be true
21
+ expect(root.i?).to be true
22
+ expect(root.x?).to be false
23
+ end
24
+
13
25
  specify('parse node types') do
14
26
  root = RP.parse('^(one){2,3}([^d\\]efm-qz\\,\\-]*)(ghi)+$')
15
27
 
@@ -1,31 +1,17 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Anchor parsing') do
4
- tests = {
5
- '^a' => [0, :anchor, :bol, Anchor::BOL],
6
- 'a$' => [1, :anchor, :eol, Anchor::EOL],
4
+ include_examples 'parse', /^a/, 0 => [:anchor, :bol, Anchor::BOL]
5
+ include_examples 'parse', /a$/, 1 => [:anchor, :eol, Anchor::EOL]
7
6
 
8
- '\Aa' => [0, :anchor, :bos, Anchor::BOS],
9
- 'a\z' => [1, :anchor, :eos, Anchor::EOS],
10
- 'a\Z' => [1, :anchor, :eos_ob_eol, Anchor::EOSobEOL],
7
+ include_examples 'parse', /\Aa/, 0 => [:anchor, :bos, Anchor::BOS]
8
+ include_examples 'parse', /a\z/, 1 => [:anchor, :eos, Anchor::EOS]
9
+ include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol, Anchor::EOSobEOL]
11
10
 
12
- 'a\b' => [1, :anchor, :word_boundary, Anchor::WordBoundary],
13
- 'a\B' => [1, :anchor, :nonword_boundary, Anchor::NonWordBoundary],
11
+ include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary, Anchor::WordBoundary]
12
+ include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
14
13
 
15
- 'a\G' => [1, :anchor, :match_start, Anchor::MatchStart],
14
+ include_examples 'parse', /a\G/, 1 => [:anchor, :match_start, Anchor::MatchStart]
16
15
 
17
- "\\\\Aa" => [0, :escape, :backslash, EscapeSequence::Literal],
18
- }
19
-
20
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
21
- specify("parse_anchor_#{token}_#{count}") do
22
- root = RP.parse(pattern, 'ruby/1.9')
23
- exp = root.expressions.at(index)
24
-
25
- expect(exp).to be_a(klass)
26
-
27
- expect(exp.type).to eq type
28
- expect(exp.token).to eq token
29
- end
30
- end
16
+ include_examples 'parse', /\\A/, 0 => [:escape, :backslash, EscapeSequence::Literal]
31
17
  end
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Conditional parsing') do
4
4
  specify('parse conditional') do
5
- regexp = Regexp.new('(?<A>a)(?(<A>)T|F)/')
5
+ regexp = /(?<A>a)(?(<A>)T|F)/
6
6
 
7
7
  root = RP.parse(regexp, 'ruby/2.0')
8
8
  exp = root[1]
@@ -16,7 +16,7 @@ RSpec.describe('Conditional parsing') do
16
16
  end
17
17
 
18
18
  specify('parse conditional condition') do
19
- regexp = Regexp.new('(?<A>a)(?(<A>)T|F)/')
19
+ regexp = /(?<A>a)(?(<A>)T|F)/
20
20
 
21
21
  root = RP.parse(regexp, 'ruby/2.0')
22
22
  exp = root[1].condition
@@ -31,7 +31,7 @@ RSpec.describe('Conditional parsing') do
31
31
  end
32
32
 
33
33
  specify('parse conditional condition with number ref') do
34
- regexp = Regexp.new('(a)(?(1)T|F)/')
34
+ regexp = /(a)(?(1)T|F)/
35
35
 
36
36
  root = RP.parse(regexp, 'ruby/2.0')
37
37
  exp = root[1].condition
@@ -46,7 +46,7 @@ RSpec.describe('Conditional parsing') do
46
46
  end
47
47
 
48
48
  specify('parse conditional nested groups') do
49
- regexp = Regexp.new('((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/')
49
+ regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
50
50
 
51
51
  root = RP.parse(regexp, 'ruby/2.0')
52
52
 
@@ -80,7 +80,7 @@ RSpec.describe('Conditional parsing') do
80
80
  end
81
81
 
82
82
  specify('parse conditional nested') do
83
- regexp = Regexp.new('(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/')
83
+ regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
84
84
 
85
85
  root = RP.parse(regexp, 'ruby/2.0')
86
86
 
@@ -102,7 +102,7 @@ RSpec.describe('Conditional parsing') do
102
102
  end
103
103
 
104
104
  specify('parse conditional nested alternation') do
105
- regexp = Regexp.new('(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/')
105
+ regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
106
106
 
107
107
  root = RP.parse(regexp, 'ruby/2.0')
108
108
 
@@ -125,7 +125,7 @@ RSpec.describe('Conditional parsing') do
125
125
  end
126
126
 
127
127
  specify('parse conditional extra separator') do
128
- regexp = Regexp.new('(?<A>a)(?(<A>)T|)/')
128
+ regexp = /(?<A>a)(?(<A>)T|)/
129
129
 
130
130
  root = RP.parse(regexp, 'ruby/2.0')
131
131
  branches = root[1].branches
@@ -146,7 +146,7 @@ RSpec.describe('Conditional parsing') do
146
146
  end
147
147
 
148
148
  specify('parse conditional quantified') do
149
- regexp = Regexp.new('(foo)(?(1)\d|(\w)){42}/')
149
+ regexp = /(foo)(?(1)\d|(\w)){42}/
150
150
 
151
151
  root = RP.parse(regexp, 'ruby/2.0')
152
152
  conditional = root[1]
@@ -158,7 +158,7 @@ RSpec.describe('Conditional parsing') do
158
158
  end
159
159
 
160
160
  specify('parse conditional branch content quantified') do
161
- regexp = Regexp.new('(foo)(?(1)\d{23}|(\w){42})/')
161
+ regexp = /(foo)(?(1)\d{23}|(\w){42})/
162
162
 
163
163
  root = RP.parse(regexp, 'ruby/2.0')
164
164
  conditional = root[1]
@@ -1,51 +1,30 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Parsing errors') do
4
- let(:rp) { Regexp::Parser.new }
5
- before { rp.parse(/foo/) }
4
+ let(:parser) { Regexp::Parser.new }
5
+ before { parser.parse(/foo/) } # initializes ivars
6
6
 
7
- specify('parser unknown token type') do
8
- expect { rp.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
7
+ it('raises UnknownTokenTypeError for unknown token types') do
8
+ expect { parser.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
9
9
  .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
10
  end
11
11
 
12
- specify('parser unknown set token') do
13
- expect { rp.send(:parse_token, Regexp::Token.new(:set, :foo)) }
14
- .to raise_error(Regexp::Parser::UnknownTokenError)
15
- end
16
-
17
- specify('parser unknown meta token') do
18
- expect { rp.send(:parse_token, Regexp::Token.new(:meta, :foo)) }
19
- .to raise_error(Regexp::Parser::UnknownTokenError)
20
- end
21
-
22
- specify('parser unknown character type token') do
23
- expect { rp.send(:parse_token, Regexp::Token.new(:type, :foo)) }
24
- .to raise_error(Regexp::Parser::UnknownTokenError)
25
- end
26
-
27
- specify('parser unknown unicode property token') do
28
- expect { rp.send(:parse_token, Regexp::Token.new(:property, :foo)) }
29
- .to raise_error(Regexp::Parser::UnknownTokenError)
30
- end
31
-
32
- specify('parser unknown unicode nonproperty token') do
33
- expect { rp.send(:parse_token, Regexp::Token.new(:nonproperty, :foo)) }
34
- .to raise_error(Regexp::Parser::UnknownTokenError)
35
- end
36
-
37
- specify('parser unknown anchor token') do
38
- expect { rp.send(:parse_token, Regexp::Token.new(:anchor, :foo)) }
39
- .to raise_error(Regexp::Parser::UnknownTokenError)
40
- end
41
-
42
- specify('parser unknown quantifier token') do
43
- expect { rp.send(:parse_token, Regexp::Token.new(:quantifier, :foo)) }
44
- .to raise_error(Regexp::Parser::UnknownTokenError)
45
- end
46
-
47
- specify('parser unknown group open token') do
48
- expect { rp.send(:parse_token, Regexp::Token.new(:group, :foo)) }
49
- .to raise_error(Regexp::Parser::UnknownTokenError)
50
- end
12
+ RSpec.shared_examples 'UnknownTokenError' do |type, token|
13
+ it "raises for unkown tokens of type #{type}" do
14
+ expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
+ .to raise_error(Regexp::Parser::UnknownTokenError)
16
+ end
17
+ end
18
+
19
+ include_examples 'UnknownTokenError', :anchor
20
+ include_examples 'UnknownTokenError', :backref
21
+ include_examples 'UnknownTokenError', :conditional
22
+ include_examples 'UnknownTokenError', :free_space
23
+ include_examples 'UnknownTokenError', :group
24
+ include_examples 'UnknownTokenError', :meta
25
+ include_examples 'UnknownTokenError', :nonproperty
26
+ include_examples 'UnknownTokenError', :property
27
+ include_examples 'UnknownTokenError', :quantifier
28
+ include_examples 'UnknownTokenError', :set
29
+ include_examples 'UnknownTokenError', :type
51
30
  end
@@ -1,49 +1,35 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('Escape parsing') do
4
- tests = {
5
- /a\ac/ => [1, :escape, :bell, EscapeSequence::Bell],
6
- /a\ec/ => [1, :escape, :escape, EscapeSequence::AsciiEscape],
7
- /a\fc/ => [1, :escape, :form_feed, EscapeSequence::FormFeed],
8
- /a\nc/ => [1, :escape, :newline, EscapeSequence::Newline],
9
- /a\rc/ => [1, :escape, :carriage, EscapeSequence::Return],
10
- /a\tc/ => [1, :escape, :tab, EscapeSequence::Tab],
11
- /a\vc/ => [1, :escape, :vertical_tab, EscapeSequence::VerticalTab],
12
-
13
- # meta character escapes
14
- /a\.c/ => [1, :escape, :dot, EscapeSequence::Literal],
15
- /a\?c/ => [1, :escape, :zero_or_one, EscapeSequence::Literal],
16
- /a\*c/ => [1, :escape, :zero_or_more, EscapeSequence::Literal],
17
- /a\+c/ => [1, :escape, :one_or_more, EscapeSequence::Literal],
18
- /a\|c/ => [1, :escape, :alternation, EscapeSequence::Literal],
19
- /a\(c/ => [1, :escape, :group_open, EscapeSequence::Literal],
20
- /a\)c/ => [1, :escape, :group_close, EscapeSequence::Literal],
21
- /a\{c/ => [1, :escape, :interval_open, EscapeSequence::Literal],
22
- /a\}c/ => [1, :escape, :interval_close, EscapeSequence::Literal],
23
-
24
- # unicode escapes
25
- /a\u0640/ => [1, :escape, :codepoint, EscapeSequence::Codepoint],
26
- /a\u{41 1F60D}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
27
- /a\u{10FFFF}/ => [1, :escape, :codepoint_list, EscapeSequence::CodepointList],
28
-
29
- # hex escapes
30
- /a\xFF/n => [1, :escape, :hex, EscapeSequence::Hex],
31
-
32
- # octal escapes
33
- /a\177/n => [1, :escape, :octal, EscapeSequence::Octal],
34
- }
35
-
36
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
37
- specify("parse_escape_#{token}_#{count = (count + 1)}") do
38
- root = RP.parse(pattern, 'ruby/1.9')
39
- exp = root.expressions.at(index)
40
-
41
- expect(exp).to be_a(klass)
42
-
43
- expect(exp.type).to eq type
44
- expect(exp.token).to eq token
45
- end
46
- end
3
+ RSpec.describe('EscapeSequence parsing') do
4
+ include_examples 'parse', /a\ac/, 1 => [:escape, :bell, EscapeSequence::Bell]
5
+ include_examples 'parse', /a\ec/, 1 => [:escape, :escape, EscapeSequence::AsciiEscape]
6
+ include_examples 'parse', /a\fc/, 1 => [:escape, :form_feed, EscapeSequence::FormFeed]
7
+ include_examples 'parse', /a\nc/, 1 => [:escape, :newline, EscapeSequence::Newline]
8
+ include_examples 'parse', /a\rc/, 1 => [:escape, :carriage, EscapeSequence::Return]
9
+ include_examples 'parse', /a\tc/, 1 => [:escape, :tab, EscapeSequence::Tab]
10
+ include_examples 'parse', /a\vc/, 1 => [:escape, :vertical_tab, EscapeSequence::VerticalTab]
11
+
12
+ # meta character escapes
13
+ include_examples 'parse', /a\.c/, 1 => [:escape, :dot, EscapeSequence::Literal]
14
+ include_examples 'parse', /a\?c/, 1 => [:escape, :zero_or_one, EscapeSequence::Literal]
15
+ include_examples 'parse', /a\*c/, 1 => [:escape, :zero_or_more, EscapeSequence::Literal]
16
+ include_examples 'parse', /a\+c/, 1 => [:escape, :one_or_more, EscapeSequence::Literal]
17
+ include_examples 'parse', /a\|c/, 1 => [:escape, :alternation, EscapeSequence::Literal]
18
+ include_examples 'parse', /a\(c/, 1 => [:escape, :group_open, EscapeSequence::Literal]
19
+ include_examples 'parse', /a\)c/, 1 => [:escape, :group_close, EscapeSequence::Literal]
20
+ include_examples 'parse', /a\{c/, 1 => [:escape, :interval_open, EscapeSequence::Literal]
21
+ include_examples 'parse', /a\}c/, 1 => [:escape, :interval_close, EscapeSequence::Literal]
22
+
23
+ # unicode escapes
24
+ include_examples 'parse', /a\u0640/, 1 => [:escape, :codepoint, EscapeSequence::Codepoint]
25
+ include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
+ include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
+
28
+ # hex escapes
29
+ include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
+
31
+ # octal escapes
32
+ include_examples 'parse', /a\177/n, 1 => [:escape, :octal, EscapeSequence::Octal]
47
33
 
48
34
  specify('parse chars and codepoints') do
49
35
  root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
@@ -65,6 +51,9 @@ RSpec.describe('Escape parsing') do
65
51
 
66
52
  expect(root[5].chars).to eq %w[D E]
67
53
  expect(root[5].codepoints).to eq [68, 69]
54
+
55
+ expect { root[5].char }.to raise_error(/#chars/)
56
+ expect { root[5].codepoint }.to raise_error(/#codepoints/)
68
57
  end
69
58
 
70
59
  specify('parse escape control sequence lower') do
@@ -24,13 +24,34 @@ RSpec.describe('FreeSpace parsing') do
24
24
  expect(root.first.text).to eq 'a b c d'
25
25
  end
26
26
 
27
+ specify('parse single-line free space comments without spaces') do
28
+ regexp = /a#b/x
29
+
30
+ root = RP.parse(regexp)
31
+ expect(root.length).to eq 2
32
+
33
+ expect(root[0]).to be_instance_of(Literal)
34
+ expect(root[1]).to be_instance_of(Comment)
35
+ end
36
+
37
+ specify('parse single-line free space comments with spaces') do
38
+ regexp = /a # b/x
39
+
40
+ root = RP.parse(regexp)
41
+ expect(root.length).to eq 3
42
+
43
+ expect(root[0]).to be_instance_of(Literal)
44
+ expect(root[1]).to be_instance_of(WhiteSpace)
45
+ expect(root[2]).to be_instance_of(Comment)
46
+ end
47
+
27
48
  specify('parse free space comments') do
28
49
  regexp = /
29
50
  a ? # One letter
30
51
  b {2,5} # Another one
31
52
  [c-g] + # A set
32
53
  (h|i|j) | # A group
33
- klm *
54
+ klm#nospace before or after comment hash
34
55
  nop +
35
56
  /x
36
57
 
@@ -51,11 +72,11 @@ RSpec.describe('FreeSpace parsing') do
51
72
 
52
73
  alt_2 = alt.alternatives.last
53
74
  expect(alt_2).to be_instance_of(Alternative)
54
- expect(alt_2.length).to eq 7
75
+ expect(alt_2.length).to eq 8
55
76
 
56
- [0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
77
+ [0, 2, 5, 7].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
57
78
 
58
- expect(alt_2[1]).to be_instance_of(Comment)
79
+ [1, 4].each { |i| expect(alt_2[i]).to be_instance_of(Comment) }
59
80
  end
60
81
 
61
82
  specify('parse free space nested comments') do