regexp_parser 1.5.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -0
  3. data/Gemfile +3 -3
  4. data/README.md +14 -6
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +6 -43
  7. data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  9. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  10. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  11. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  12. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  13. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  14. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  15. data/lib/regexp_parser/expression/sequence.rb +3 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  17. data/lib/regexp_parser/lexer.rb +4 -25
  18. data/lib/regexp_parser/parser.rb +40 -33
  19. data/lib/regexp_parser/scanner.rb +1208 -1353
  20. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  21. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  22. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  23. data/lib/regexp_parser/scanner/scanner.rl +116 -202
  24. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  25. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  26. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  27. data/lib/regexp_parser/version.rb +1 -1
  28. data/spec/expression/base_spec.rb +14 -0
  29. data/spec/expression/methods/match_length_spec.rb +20 -0
  30. data/spec/expression/methods/match_spec.rb +25 -0
  31. data/spec/expression/methods/tests_spec.rb +2 -0
  32. data/spec/expression/methods/traverse_spec.rb +21 -0
  33. data/spec/expression/options_spec.rb +128 -0
  34. data/spec/expression/root_spec.rb +9 -0
  35. data/spec/expression/sequence_spec.rb +9 -0
  36. data/spec/lexer/conditionals_spec.rb +49 -119
  37. data/spec/lexer/delimiters_spec.rb +68 -0
  38. data/spec/lexer/escapes_spec.rb +8 -32
  39. data/spec/lexer/keep_spec.rb +5 -17
  40. data/spec/lexer/literals_spec.rb +73 -110
  41. data/spec/lexer/nesting_spec.rb +86 -117
  42. data/spec/lexer/refcalls_spec.rb +51 -50
  43. data/spec/parser/all_spec.rb +13 -1
  44. data/spec/parser/anchors_spec.rb +9 -23
  45. data/spec/parser/conditionals_spec.rb +9 -9
  46. data/spec/parser/errors_spec.rb +22 -43
  47. data/spec/parser/escapes_spec.rb +33 -44
  48. data/spec/parser/free_space_spec.rb +25 -4
  49. data/spec/parser/groups_spec.rb +98 -257
  50. data/spec/parser/keep_spec.rb +2 -15
  51. data/spec/parser/options_spec.rb +28 -0
  52. data/spec/parser/posix_classes_spec.rb +5 -24
  53. data/spec/parser/properties_spec.rb +42 -54
  54. data/spec/parser/quantifiers_spec.rb +42 -283
  55. data/spec/parser/refcalls_spec.rb +60 -185
  56. data/spec/parser/set/intersections_spec.rb +17 -17
  57. data/spec/parser/set/ranges_spec.rb +17 -17
  58. data/spec/parser/sets_spec.rb +5 -5
  59. data/spec/parser/types_spec.rb +11 -36
  60. data/spec/scanner/anchors_spec.rb +13 -28
  61. data/spec/scanner/conditionals_spec.rb +121 -173
  62. data/spec/scanner/delimiters_spec.rb +52 -0
  63. data/spec/scanner/errors_spec.rb +64 -87
  64. data/spec/scanner/escapes_spec.rb +53 -50
  65. data/spec/scanner/free_space_spec.rb +102 -165
  66. data/spec/scanner/groups_spec.rb +45 -64
  67. data/spec/scanner/keep_spec.rb +5 -28
  68. data/spec/scanner/literals_spec.rb +45 -81
  69. data/spec/scanner/meta_spec.rb +13 -33
  70. data/spec/scanner/options_spec.rb +36 -0
  71. data/spec/scanner/properties_spec.rb +43 -286
  72. data/spec/scanner/quantifiers_spec.rb +13 -28
  73. data/spec/scanner/refcalls_spec.rb +32 -48
  74. data/spec/scanner/sets_spec.rb +88 -102
  75. data/spec/scanner/types_spec.rb +10 -25
  76. data/spec/spec_helper.rb +1 -0
  77. data/spec/support/shared_examples.rb +77 -0
  78. data/spec/syntax/syntax_spec.rb +4 -0
  79. data/spec/syntax/versions/1.8.6_spec.rb +12 -33
  80. data/spec/syntax/versions/1.9.1_spec.rb +5 -18
  81. data/spec/syntax/versions/1.9.3_spec.rb +4 -17
  82. data/spec/syntax/versions/2.0.0_spec.rb +8 -23
  83. data/spec/syntax/versions/2.2.0_spec.rb +4 -17
  84. data/spec/syntax/versions/aliases_spec.rb +27 -109
  85. metadata +28 -10
  86. data/spec/scanner/scripts_spec.rb +0 -49
  87. data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter scanning') do
4
+ include_examples 'scan', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1]
6
+
7
+ include_examples 'scan', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2]
9
+
10
+ include_examples 'scan', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1]
12
+
13
+ include_examples 'scan', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2]
15
+
16
+ include_examples 'scan', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2]
18
+
19
+ include_examples 'scan', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2]
21
+
22
+ include_examples 'scan', '}{+',
23
+ 0 => [:literal, :literal, '}{', 0, 2]
24
+
25
+ include_examples 'scan', '{{var}}',
26
+ 0 => [:literal, :literal, '{{var}}', 0, 7]
27
+
28
+ include_examples 'scan', 'a{1,2',
29
+ 0 => [:literal, :literal, 'a{1,2', 0, 5]
30
+
31
+ include_examples 'scan', '({.+})',
32
+ 0 => [:group, :capture, '(', 0, 1],
33
+ 1 => [:literal, :literal, '{', 1, 2],
34
+ 2 => [:meta, :dot, '.', 2, 3],
35
+ 3 => [:quantifier, :one_or_more, '+', 3, 4],
36
+ 4 => [:literal, :literal, '}', 4, 5],
37
+ 5 => [:group, :close, ')', 5, 6]
38
+
39
+ include_examples 'scan', ']',
40
+ 0 => [:literal, :literal, ']', 0, 1]
41
+
42
+ include_examples 'scan', ']]',
43
+ 0 => [:literal, :literal, ']]', 0, 2]
44
+
45
+ include_examples 'scan', ']\[',
46
+ 0 => [:literal, :literal, ']', 0, 1],
47
+ 1 => [:escape, :set_open, '\[', 1, 3]
48
+
49
+ include_examples 'scan', '()',
50
+ 0 => [:group, :capture, '(', 0, 1],
51
+ 1 => [:group, :close, ')', 1, 2]
52
+ end
@@ -1,90 +1,67 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('Scanning errors') do
4
- specify('scanner unbalanced set') do
5
- expect { RS.scan('[[:alpha:]') }.to raise_error(RS::PrematureEndError)
6
- end
7
-
8
- specify('scanner unbalanced group') do
9
- expect { RS.scan('(abc') }.to raise_error(RS::PrematureEndError)
10
- end
11
-
12
- specify('scanner unbalanced interval') do
13
- expect { RS.scan('a{1,2') }.to raise_error(RS::PrematureEndError)
14
- end
15
-
16
- specify('scanner eof in property') do
17
- expect { RS.scan('\\p{asci') }.to raise_error(RS::PrematureEndError)
18
- end
19
-
20
- specify('scanner incomplete property') do
21
- expect { RS.scan('\\p{ascii abc') }.to raise_error(RS::PrematureEndError)
22
- end
23
-
24
- specify('scanner unknown property') do
25
- expect { RS.scan('\\p{foobar}') }.to raise_error(RS::UnknownUnicodePropertyError)
26
- end
27
-
28
- specify('scanner incomplete options') do
29
- expect { RS.scan('(?mix abc)') }.to raise_error(RS::ScannerError)
30
- end
31
-
32
- specify('scanner eof options') do
33
- expect { RS.scan('(?mix') }.to raise_error(RS::PrematureEndError)
34
- end
35
-
36
- specify('scanner incorrect options') do
37
- expect { RS.scan('(?mix^bc') }.to raise_error(RS::ScannerError)
38
- end
39
-
40
- specify('scanner eof escape') do
41
- expect { RS.scan('\\') }.to raise_error(RS::PrematureEndError)
42
- end
43
-
44
- specify('scanner eof in hex escape') do
45
- expect { RS.scan('\\x') }.to raise_error(RS::PrematureEndError)
46
- end
47
-
48
- specify('scanner eof in codepoint escape') do
49
- expect { RS.scan('\\u') }.to raise_error(RS::PrematureEndError)
50
- expect { RS.scan('\\u0') }.to raise_error(RS::PrematureEndError)
51
- expect { RS.scan('\\u00') }.to raise_error(RS::PrematureEndError)
52
- expect { RS.scan('\\u000') }.to raise_error(RS::PrematureEndError)
53
- expect { RS.scan('\\u{') }.to raise_error(RS::PrematureEndError)
54
- expect { RS.scan('\\u{00') }.to raise_error(RS::PrematureEndError)
55
- expect { RS.scan('\\u{0000') }.to raise_error(RS::PrematureEndError)
56
- expect { RS.scan('\\u{0000 ') }.to raise_error(RS::PrematureEndError)
57
- expect { RS.scan('\\u{0000 0000') }.to raise_error(RS::PrematureEndError)
58
- end
59
-
60
- specify('scanner eof in control sequence') do
61
- expect { RS.scan('\\c') }.to raise_error(RS::PrematureEndError)
62
- expect { RS.scan('\\c\\M') }.to raise_error(RS::PrematureEndError)
63
- expect { RS.scan('\\c\\M-') }.to raise_error(RS::PrematureEndError)
64
- expect { RS.scan('\\C') }.to raise_error(RS::PrematureEndError)
65
- expect { RS.scan('\\C-') }.to raise_error(RS::PrematureEndError)
66
- expect { RS.scan('\\C-\\M') }.to raise_error(RS::PrematureEndError)
67
- expect { RS.scan('\\C-\\M-') }.to raise_error(RS::PrematureEndError)
68
- end
69
-
70
- specify('scanner eof in meta sequence') do
71
- expect { RS.scan('\\M') }.to raise_error(RS::PrematureEndError)
72
- expect { RS.scan('\\M-') }.to raise_error(RS::PrematureEndError)
73
- expect { RS.scan('\\M-\\') }.to raise_error(RS::PrematureEndError)
74
- expect { RS.scan('\\M-\\c') }.to raise_error(RS::PrematureEndError)
75
- expect { RS.scan('\\M-\\C') }.to raise_error(RS::PrematureEndError)
76
- expect { RS.scan('\\M-\\C-') }.to raise_error(RS::PrematureEndError)
77
- end
78
-
79
- specify('scanner invalid hex escape') do
80
- expect { RS.scan('\\xZ') }.to raise_error(RS::InvalidSequenceError)
81
- expect { RS.scan('\\xZ0') }.to raise_error(RS::InvalidSequenceError)
82
- end
83
-
84
- specify('scanner invalid named group') do
85
- expect { RS.scan("(?'')") }.to raise_error(RS::InvalidGroupError)
86
- expect { RS.scan("(?''empty-name)") }.to raise_error(RS::InvalidGroupError)
87
- expect { RS.scan('(?<>)') }.to raise_error(RS::InvalidGroupError)
88
- expect { RS.scan('(?<>empty-name)') }.to raise_error(RS::InvalidGroupError)
89
- end
3
+ RSpec.describe(Regexp::Scanner) do
4
+ RSpec.shared_examples 'scan error' do |error, issue, source|
5
+ it "raises #{error} for #{issue} `#{source}`" do
6
+ expect { RS.scan(source) }.to raise_error(error)
7
+ end
8
+ end
9
+
10
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
11
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
12
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
13
+ include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
14
+ include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
15
+ include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
16
+ include_examples 'scan error', RS::PrematureEndError, 'eof escape', '\\'
17
+ include_examples 'scan error', RS::PrematureEndError, 'eof in hex escape', '\x'
18
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u'
19
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u0'
20
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u00'
21
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u000'
22
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{'
23
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{00'
24
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000'
25
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 '
26
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 0000'
27
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c'
28
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M'
29
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M-'
30
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C'
31
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-'
32
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M'
33
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M-'
34
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M'
35
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-'
36
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\\'
37
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\c'
38
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C'
39
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C-'
40
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ'
41
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ0'
42
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\cü'
43
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\c\M-ü'
44
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-ü'
45
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-\M-ü'
46
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-ü'
47
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\cü'
48
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\C-ü'
49
+ include_examples 'scan error', RS::ScannerError, 'invalid c-seq', '\Ca'
50
+ include_examples 'scan error', RS::ScannerError, 'invalid m-seq', '\Ma'
51
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?'')"
52
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?''empty-name)"
53
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>)'
54
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>empty-name)'
55
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?foo)'
56
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix abc)'
57
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix^bc'
58
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?)'
59
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-foo)'
60
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-u)'
61
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-mixu)'
62
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k<>'
63
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k\'\''
64
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g<>'
65
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g\'\''
66
+ include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
90
67
  end
@@ -1,54 +1,57 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Escape scanning') do
4
- tests = {
5
- /c\at/ => [1, :escape, :bell, '\a', 1, 3],
6
-
7
- # not an escape outside a character set
8
- /c\bt/ => [1, :anchor, :word_boundary, '\b', 1, 3],
9
-
10
- /c\ft/ => [1, :escape, :form_feed, '\f', 1, 3],
11
- /c\nt/ => [1, :escape, :newline, '\n', 1, 3],
12
- /c\tt/ => [1, :escape, :tab, '\t', 1, 3],
13
- /c\vt/ => [1, :escape, :vertical_tab, '\v', 1, 3],
14
-
15
- 'c\qt' => [1, :escape, :literal, '\q', 1, 3],
16
-
17
- 'a\012c' => [1, :escape, :octal, '\012', 1, 5],
18
- 'a\0124' => [1, :escape, :octal, '\012', 1, 5],
19
- '\712+7' => [0, :escape, :octal, '\712', 0, 4],
20
-
21
- 'a\x24c' => [1, :escape, :hex, '\x24', 1, 5],
22
- 'a\x0640c' => [1, :escape, :hex, '\x06', 1, 5],
23
-
24
- 'a\u0640c' => [1, :escape, :codepoint, '\u0640', 1, 7],
25
- 'a\u{640 0641}c' => [1, :escape, :codepoint_list, '\u{640 0641}', 1, 13],
26
- 'a\u{10FFFF}c' => [1, :escape, :codepoint_list, '\u{10FFFF}', 1, 11],
27
-
28
- /a\cBc/ => [1, :escape, :control, '\cB', 1, 4],
29
- /a\C-bc/ => [1, :escape, :control, '\C-b', 1, 5],
30
- /a\c\M-Bc/n => [1, :escape, :control, '\c\M-B', 1, 7],
31
- /a\C-\M-Bc/n => [1, :escape, :control, '\C-\M-B', 1, 8],
32
-
33
- /a\M-Bc/n => [1, :escape, :meta_sequence, '\M-B', 1, 5],
34
- /a\M-\C-Bc/n => [1, :escape, :meta_sequence, '\M-\C-B', 1, 8],
35
- /a\M-\cBc/n => [1, :escape, :meta_sequence, '\M-\cB', 1, 7],
36
-
37
- 'ab\\\xcd' => [1, :escape, :backslash, '\\\\', 2, 4],
38
- 'ab\\\0cd' => [1, :escape, :backslash, '\\\\', 2, 4],
39
- 'ab\\\Kcd' => [1, :escape, :backslash, '\\\\', 2, 4],
40
- }
41
-
42
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
43
- specify("scanner_#{type}_#{token}_#{count}") do
44
- tokens = RS.scan(pattern)
45
- result = tokens.at(index)
46
-
47
- expect(result[0]).to eq type
48
- expect(result[1]).to eq token
49
- expect(result[2]).to eq text
50
- expect(result[3]).to eq ts
51
- expect(result[4]).to eq te
52
- end
53
- end
4
+ include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
+
6
+ # not an escape outside a character set
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
+
9
+ include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
+ include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
11
+ include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
+ include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
+
14
+ include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
15
+
16
+ # these incomplete ref/call sequences are treated as literal escapes by Ruby
17
+ include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
18
+ include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
19
+
20
+ include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
21
+ include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
22
+ include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
23
+
24
+ include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
25
+ include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
26
+
27
+ include_examples 'scan', 'a\u0640c', 1 => [:escape, :codepoint, '\u0640', 1, 7]
28
+ include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
29
+ include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
30
+
31
+ include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
32
+ include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
33
+ include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
34
+ include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
35
+ include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
36
+ include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
37
+ include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
38
+ include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
39
+ include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
40
+ include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
41
+
42
+ include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
43
+ include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
44
+ include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
45
+ include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
46
+ include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
47
+ include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
48
+ include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
49
+
50
+ include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
51
+ include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
52
+ include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
53
+
54
+ include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
55
+ include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
56
+ include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
54
57
  end
@@ -1,196 +1,133 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('FreeSpace scanning') do
4
- specify('scan free space tokens') do
5
- regexp = /
4
+ describe('scan free space tokens') do
5
+ let(:tokens) { RS.scan(/
6
6
  a
7
7
  b ? c *
8
8
  d {2,3}
9
9
  e + | f +
10
- /x
10
+ /x) }
11
11
 
12
- tokens = RS.scan(regexp)
13
-
14
- 0.upto(24) do |i|
15
- if i.even?
12
+ 0.upto(24).select(&:even?).each do |i|
13
+ it "scans #{i} as free space" do
16
14
  expect(tokens[i][0]).to eq :free_space
17
15
  expect(tokens[i][1]).to eq :whitespace
18
- else
16
+ end
17
+ end
18
+ 0.upto(24).reject(&:even?).each do |i|
19
+ it "does not scan #{i} as free space" do
19
20
  expect(tokens[i][0]).not_to eq :free_space
20
21
  expect(tokens[i][1]).not_to eq :whitespace
21
22
  end
22
23
  end
23
24
 
24
- [0, 2, 10, 14].each { |i| expect(tokens[i][2]).to eq "\n " }
25
-
26
- [4, 6, 8, 12].each { |i| expect(tokens[i][2]).to eq ' ' }
25
+ it 'sets the correct text' do
26
+ [0, 2, 10, 14].each { |i| expect(tokens[i][2]).to eq "\n " }
27
+ [4, 6, 8, 12].each { |i| expect(tokens[i][2]).to eq ' ' }
28
+ end
27
29
  end
28
30
 
29
- specify('scan free space comments') do
30
- regexp = /
31
+ describe('scan free space comments') do
32
+ include_examples 'scan', /
31
33
  a + # A + comment
32
34
  b ? # B ? comment
33
35
  c {2,3} # C {2,3} comment
34
36
  d + | e + # D|E comment
35
- /x
36
-
37
- tokens = RS.scan(regexp)
38
-
39
- [
40
- [ 5, :free_space, :comment, "# A + comment\n", 11, 25],
41
- [11, :free_space, :comment, "# B ? comment\n", 37, 51],
42
- [17, :free_space, :comment, "# C {2,3} comment\n", 66, 84],
43
- [29, :free_space, :comment, "# D|E comment\n", 100, 114],
44
- ].each do |index, type, token, text, ts, te|
45
- result = tokens[index]
46
-
47
- expect(result[0]).to eq type
48
- expect(result[1]).to eq token
49
- expect(result[2]).to eq text
50
- expect(result[3]).to eq ts
51
- expect(result[4]).to eq te
52
- end
37
+ /x,
38
+ 5 => [:free_space, :comment, "# A + comment\n", 11, 25],
39
+ 11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
+ 17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
+ 29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
53
42
  end
54
43
 
55
- specify('scan free space inlined') do
56
- regexp = /a b(?x:c d e)f g/
57
-
58
- tokens = RS.scan(regexp)
59
-
60
- [
61
- [0, :literal, :literal, 'a b', 0, 3],
62
- [1, :group, :options, '(?x:', 3, 7],
63
- [2, :literal, :literal, 'c', 7, 8],
64
- [3, :free_space, :whitespace, ' ', 8, 9],
65
- [4, :literal, :literal, 'd', 9, 10],
66
- [5, :free_space, :whitespace, ' ', 10, 11],
67
- [6, :literal, :literal, 'e', 11, 12],
68
- [7, :group, :close, ')', 12, 13],
69
- [8, :literal, :literal, 'f g', 13, 16]
70
- ].each do |index, type, token, text, ts, te|
71
- result = tokens[index]
72
-
73
- expect(result[0]).to eq type
74
- expect(result[1]).to eq token
75
- expect(result[2]).to eq text
76
- expect(result[3]).to eq ts
77
- expect(result[4]).to eq te
78
- end
44
+ describe('scan free space inlined') do
45
+ include_examples 'scan', /a b(?x:c d e)f g/,
46
+ 0 => [:literal, :literal, 'a b', 0, 3],
47
+ 1 => [:group, :options, '(?x:', 3, 7],
48
+ 2 => [:literal, :literal, 'c', 7, 8],
49
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
50
+ 4 => [:literal, :literal, 'd', 9, 10],
51
+ 5 => [:free_space, :whitespace, ' ', 10, 11],
52
+ 6 => [:literal, :literal, 'e', 11, 12],
53
+ 7 => [:group, :close, ')', 12, 13],
54
+ 8 => [:literal, :literal, 'f g', 13, 16]
79
55
  end
80
56
 
81
- specify('scan free space nested') do
82
- regexp = /a b(?x:c d(?-x:e f)g h)i j/
83
-
84
- tokens = RS.scan(regexp)
85
-
86
- [
87
- [ 0, :literal, :literal, 'a b', 0, 3],
88
- [ 1, :group, :options, '(?x:', 3, 7],
89
- [ 2, :literal, :literal, 'c', 7, 8],
90
- [ 3, :free_space, :whitespace, ' ', 8, 9],
91
- [ 4, :literal, :literal, 'd', 9, 10],
92
- [ 5, :group, :options, '(?-x:', 10, 15],
93
- [ 6, :literal, :literal, 'e f', 15, 18],
94
- [ 7, :group, :close, ')', 18, 19],
95
- [ 8, :literal, :literal, 'g', 19, 20],
96
- [ 9, :free_space, :whitespace, ' ', 20, 21],
97
- [10, :literal, :literal, 'h', 21, 22],
98
- [11, :group, :close, ')', 22, 23],
99
- [12, :literal, :literal, 'i j', 23, 26]
100
- ].each do |index, type, token, text, ts, te|
101
- result = tokens[index]
102
-
103
- expect(result[0]).to eq type
104
- expect(result[1]).to eq token
105
- expect(result[2]).to eq text
106
- expect(result[3]).to eq ts
107
- expect(result[4]).to eq te
108
- end
57
+ describe('scan free space nested') do
58
+ include_examples 'scan', /a b(?x:c d(?-x:e f)g h)i j/,
59
+ 0 => [:literal, :literal, 'a b', 0, 3],
60
+ 1 => [:group, :options, '(?x:', 3, 7],
61
+ 2 => [:literal, :literal, 'c', 7, 8],
62
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
63
+ 4 => [:literal, :literal, 'd', 9, 10],
64
+ 5 => [:group, :options, '(?-x:', 10, 15],
65
+ 6 => [:literal, :literal, 'e f', 15, 18],
66
+ 7 => [:group, :close, ')', 18, 19],
67
+ 8 => [:literal, :literal, 'g', 19, 20],
68
+ 9 => [:free_space, :whitespace, ' ', 20, 21],
69
+ 10 => [:literal, :literal, 'h', 21, 22],
70
+ 11 => [:group, :close, ')', 22, 23],
71
+ 12 => [:literal, :literal, 'i j', 23, 26]
109
72
  end
110
73
 
111
- specify('scan free space nested groups') do
112
- regexp = /(a (b(?x: (c d) (?-x:(e f) )g) h)i j)/
113
-
114
- tokens = RS.scan(regexp)
115
-
116
- [
117
- [ 0, :group, :capture, '(', 0, 1],
118
- [ 1, :literal, :literal, 'a ', 1, 3],
119
- [ 2, :group, :capture, '(', 3, 4],
120
- [ 3, :literal, :literal, 'b', 4, 5],
121
- [ 4, :group, :options, '(?x:', 5, 9],
122
- [ 5, :free_space, :whitespace, ' ', 9, 10],
123
- [ 6, :group, :capture, '(', 10, 11],
124
- [ 7, :literal, :literal, 'c', 11, 12],
125
- [ 8, :free_space, :whitespace, ' ', 12, 13],
126
- [ 9, :literal, :literal, 'd', 13, 14],
127
- [10, :group, :close, ')', 14, 15],
128
- [11, :free_space, :whitespace, ' ', 15, 16],
129
- [12, :group, :options, '(?-x:', 16, 21],
130
- [13, :group, :capture, '(', 21, 22],
131
- [14, :literal, :literal, 'e f', 22, 25],
132
- [15, :group, :close, ')', 25, 26],
133
- [16, :literal, :literal, ' ', 26, 27],
134
- [17, :group, :close, ')', 27, 28],
135
- [18, :literal, :literal, 'g', 28, 29],
136
- [19, :group, :close, ')', 29, 30],
137
- [20, :literal, :literal, ' h', 30, 32],
138
- [21, :group, :close, ')', 32, 33],
139
- [22, :literal, :literal, 'i j', 33, 36],
140
- [23, :group, :close, ')', 36, 37]
141
- ].each do |index, type, token, text, ts, te|
142
- result = tokens[index]
143
-
144
- expect(result[0]).to eq type
145
- expect(result[1]).to eq token
146
- expect(result[2]).to eq text
147
- expect(result[3]).to eq ts
148
- expect(result[4]).to eq te
149
- end
74
+ describe('scan free space nested groups') do
75
+ include_examples 'scan', /(a (b(?x: (c d) (?-x:(e f) )g) h)i j)/,
76
+ 0 => [:group, :capture, '(', 0, 1],
77
+ 1 => [:literal, :literal, 'a ', 1, 3],
78
+ 2 => [:group, :capture, '(', 3, 4],
79
+ 3 => [:literal, :literal, 'b', 4, 5],
80
+ 4 => [:group, :options, '(?x:', 5, 9],
81
+ 5 => [:free_space, :whitespace, ' ', 9, 10],
82
+ 6 => [:group, :capture, '(', 10, 11],
83
+ 7 => [:literal, :literal, 'c', 11, 12],
84
+ 8 => [:free_space, :whitespace, ' ', 12, 13],
85
+ 9 => [:literal, :literal, 'd', 13, 14],
86
+ 10 => [:group, :close, ')', 14, 15],
87
+ 11 => [:free_space, :whitespace, ' ', 15, 16],
88
+ 12 => [:group, :options, '(?-x:', 16, 21],
89
+ 13 => [:group, :capture, '(', 21, 22],
90
+ 14 => [:literal, :literal, 'e f', 22, 25],
91
+ 15 => [:group, :close, ')', 25, 26],
92
+ 16 => [:literal, :literal, ' ', 26, 27],
93
+ 17 => [:group, :close, ')', 27, 28],
94
+ 18 => [:literal, :literal, 'g', 28, 29],
95
+ 19 => [:group, :close, ')', 29, 30],
96
+ 20 => [:literal, :literal, ' h', 30, 32],
97
+ 21 => [:group, :close, ')', 32, 33],
98
+ 22 => [:literal, :literal, 'i j', 33, 36],
99
+ 23 => [:group, :close, ')', 36, 37]
150
100
  end
151
101
 
152
- specify('scan free space switch groups') do
153
- regexp = /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/
154
-
155
- tokens = RS.scan(regexp)
156
-
157
- [
158
- [ 0, :group, :capture, '(', 0, 1],
159
- [ 1, :literal, :literal, 'a ', 1, 3],
160
- [ 2, :group, :capture, '(', 3, 4],
161
- [ 3, :literal, :literal, 'b', 4, 5],
162
- [ 4, :group, :capture, '(', 5, 6],
163
- [ 5, :group, :options_switch, '(?x', 6, 9],
164
- [ 6, :group, :close, ')', 9, 10],
165
- [ 7, :free_space, :whitespace, ' ', 10, 11],
166
- [ 8, :group, :capture, '(', 11, 12],
167
- [ 9, :literal, :literal, 'c', 12, 13],
168
- [10, :free_space, :whitespace, ' ', 13, 14],
169
- [11, :literal, :literal, 'd', 14, 15],
170
- [12, :group, :close, ')', 15, 16],
171
- [13, :free_space, :whitespace, ' ', 16, 17],
172
- [14, :group, :capture, '(', 17, 18],
173
- [15, :group, :options_switch, '(?-x', 18, 22],
174
- [16, :group, :close, ')', 22, 23],
175
- [17, :group, :capture, '(', 23, 24],
176
- [18, :literal, :literal, 'e f', 24, 27],
177
- [19, :group, :close, ')', 27, 28],
178
- [20, :literal, :literal, ' ', 28, 29],
179
- [21, :group, :close, ')', 29, 30],
180
- [22, :literal, :literal, 'g', 30, 31],
181
- [23, :group, :close, ')', 31, 32],
182
- [24, :literal, :literal, ' h', 32, 34],
183
- [25, :group, :close, ')', 34, 35],
184
- [26, :literal, :literal, 'i j', 35, 38],
185
- [27, :group, :close, ')', 38, 39]
186
- ].each do |index, type, token, text, ts, te|
187
- result = tokens[index]
188
-
189
- expect(result[0]).to eq type
190
- expect(result[1]).to eq token
191
- expect(result[2]).to eq text
192
- expect(result[3]).to eq ts
193
- expect(result[4]).to eq te
194
- end
102
+ describe('scan free space switch groups') do
103
+ include_examples 'scan', /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/,
104
+ 0 => [:group, :capture, '(', 0, 1],
105
+ 1 => [:literal, :literal, 'a ', 1, 3],
106
+ 2 => [:group, :capture, '(', 3, 4],
107
+ 3 => [:literal, :literal, 'b', 4, 5],
108
+ 4 => [:group, :capture, '(', 5, 6],
109
+ 5 => [:group, :options_switch, '(?x', 6, 9],
110
+ 6 => [:group, :close, ')', 9, 10],
111
+ 7 => [:free_space, :whitespace, ' ', 10, 11],
112
+ 8 => [:group, :capture, '(', 11, 12],
113
+ 9 => [:literal, :literal, 'c', 12, 13],
114
+ 10 => [:free_space, :whitespace, ' ', 13, 14],
115
+ 11 => [:literal, :literal, 'd', 14, 15],
116
+ 12 => [:group, :close, ')', 15, 16],
117
+ 13 => [:free_space, :whitespace, ' ', 16, 17],
118
+ 14 => [:group, :capture, '(', 17, 18],
119
+ 15 => [:group, :options_switch, '(?-x', 18, 22],
120
+ 16 => [:group, :close, ')', 22, 23],
121
+ 17 => [:group, :capture, '(', 23, 24],
122
+ 18 => [:literal, :literal, 'e f', 24, 27],
123
+ 19 => [:group, :close, ')', 27, 28],
124
+ 20 => [:literal, :literal, ' ', 28, 29],
125
+ 21 => [:group, :close, ')', 29, 30],
126
+ 22 => [:literal, :literal, 'g', 30, 31],
127
+ 23 => [:group, :close, ')', 31, 32],
128
+ 24 => [:literal, :literal, ' h', 32, 34],
129
+ 25 => [:group, :close, ')', 34, 35],
130
+ 26 => [:literal, :literal, 'i j', 35, 38],
131
+ 27 => [:group, :close, ')', 38, 39]
195
132
  end
196
133
  end