regexp_parser 1.3.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +53 -1
  3. data/Gemfile +3 -3
  4. data/README.md +10 -14
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  18. data/lib/regexp_parser/expression/sequence.rb +3 -6
  19. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  20. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  21. data/lib/regexp_parser/lexer.rb +30 -44
  22. data/lib/regexp_parser/parser.rb +47 -24
  23. data/lib/regexp_parser/scanner.rb +1159 -1329
  24. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  25. data/lib/regexp_parser/scanner/properties/long.yml +34 -1
  26. data/lib/regexp_parser/scanner/properties/short.yml +12 -0
  27. data/lib/regexp_parser/scanner/scanner.rl +82 -190
  28. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  29. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
  30. data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +3 -3
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +154 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +140 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/escapes_spec.rb +14 -0
  53. data/spec/lexer/keep_spec.rb +10 -0
  54. data/spec/lexer/literals_spec.rb +89 -0
  55. data/spec/lexer/nesting_spec.rb +99 -0
  56. data/spec/lexer/refcalls_spec.rb +55 -0
  57. data/spec/parser/all_spec.rb +43 -0
  58. data/spec/parser/alternation_spec.rb +88 -0
  59. data/spec/parser/anchors_spec.rb +17 -0
  60. data/spec/parser/conditionals_spec.rb +179 -0
  61. data/spec/parser/errors_spec.rb +30 -0
  62. data/spec/parser/escapes_spec.rb +121 -0
  63. data/spec/parser/free_space_spec.rb +130 -0
  64. data/spec/parser/groups_spec.rb +108 -0
  65. data/spec/parser/keep_spec.rb +6 -0
  66. data/spec/parser/posix_classes_spec.rb +8 -0
  67. data/spec/parser/properties_spec.rb +115 -0
  68. data/spec/parser/quantifiers_spec.rb +51 -0
  69. data/spec/parser/refcalls_spec.rb +112 -0
  70. data/spec/parser/set/intersections_spec.rb +127 -0
  71. data/spec/parser/set/ranges_spec.rb +111 -0
  72. data/spec/parser/sets_spec.rb +178 -0
  73. data/spec/parser/types_spec.rb +18 -0
  74. data/spec/scanner/all_spec.rb +18 -0
  75. data/spec/scanner/anchors_spec.rb +21 -0
  76. data/spec/scanner/conditionals_spec.rb +128 -0
  77. data/spec/scanner/errors_spec.rb +68 -0
  78. data/spec/scanner/escapes_spec.rb +53 -0
  79. data/spec/scanner/free_space_spec.rb +133 -0
  80. data/spec/scanner/groups_spec.rb +52 -0
  81. data/spec/scanner/keep_spec.rb +10 -0
  82. data/spec/scanner/literals_spec.rb +49 -0
  83. data/spec/scanner/meta_spec.rb +18 -0
  84. data/spec/scanner/properties_spec.rb +64 -0
  85. data/spec/scanner/quantifiers_spec.rb +20 -0
  86. data/spec/scanner/refcalls_spec.rb +36 -0
  87. data/spec/scanner/sets_spec.rb +102 -0
  88. data/spec/scanner/types_spec.rb +14 -0
  89. data/spec/spec_helper.rb +15 -0
  90. data/{test → spec}/support/runner.rb +9 -8
  91. data/spec/support/shared_examples.rb +77 -0
  92. data/{test → spec}/support/warning_extractor.rb +5 -7
  93. data/spec/syntax/syntax_spec.rb +48 -0
  94. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  95. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  96. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  97. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  98. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  99. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  100. data/spec/syntax/versions/aliases_spec.rb +37 -0
  101. data/spec/token/token_spec.rb +85 -0
  102. metadata +144 -143
  103. data/test/expression/test_all.rb +0 -12
  104. data/test/expression/test_base.rb +0 -90
  105. data/test/expression/test_clone.rb +0 -89
  106. data/test/expression/test_conditionals.rb +0 -113
  107. data/test/expression/test_free_space.rb +0 -35
  108. data/test/expression/test_set.rb +0 -84
  109. data/test/expression/test_strfregexp.rb +0 -230
  110. data/test/expression/test_subexpression.rb +0 -58
  111. data/test/expression/test_tests.rb +0 -99
  112. data/test/expression/test_to_h.rb +0 -59
  113. data/test/expression/test_to_s.rb +0 -104
  114. data/test/expression/test_traverse.rb +0 -161
  115. data/test/helpers.rb +0 -10
  116. data/test/lexer/test_all.rb +0 -41
  117. data/test/lexer/test_conditionals.rb +0 -127
  118. data/test/lexer/test_keep.rb +0 -24
  119. data/test/lexer/test_literals.rb +0 -130
  120. data/test/lexer/test_nesting.rb +0 -132
  121. data/test/lexer/test_refcalls.rb +0 -56
  122. data/test/parser/set/test_intersections.rb +0 -127
  123. data/test/parser/set/test_ranges.rb +0 -111
  124. data/test/parser/test_all.rb +0 -64
  125. data/test/parser/test_alternation.rb +0 -92
  126. data/test/parser/test_anchors.rb +0 -34
  127. data/test/parser/test_conditionals.rb +0 -187
  128. data/test/parser/test_errors.rb +0 -63
  129. data/test/parser/test_escapes.rb +0 -134
  130. data/test/parser/test_free_space.rb +0 -139
  131. data/test/parser/test_groups.rb +0 -289
  132. data/test/parser/test_keep.rb +0 -21
  133. data/test/parser/test_posix_classes.rb +0 -27
  134. data/test/parser/test_properties.rb +0 -133
  135. data/test/parser/test_quantifiers.rb +0 -301
  136. data/test/parser/test_refcalls.rb +0 -186
  137. data/test/parser/test_sets.rb +0 -179
  138. data/test/parser/test_types.rb +0 -50
  139. data/test/scanner/test_all.rb +0 -38
  140. data/test/scanner/test_anchors.rb +0 -38
  141. data/test/scanner/test_conditionals.rb +0 -184
  142. data/test/scanner/test_errors.rb +0 -91
  143. data/test/scanner/test_escapes.rb +0 -56
  144. data/test/scanner/test_free_space.rb +0 -200
  145. data/test/scanner/test_groups.rb +0 -79
  146. data/test/scanner/test_keep.rb +0 -35
  147. data/test/scanner/test_literals.rb +0 -89
  148. data/test/scanner/test_meta.rb +0 -40
  149. data/test/scanner/test_properties.rb +0 -312
  150. data/test/scanner/test_quantifiers.rb +0 -37
  151. data/test/scanner/test_refcalls.rb +0 -52
  152. data/test/scanner/test_scripts.rb +0 -53
  153. data/test/scanner/test_sets.rb +0 -119
  154. data/test/scanner/test_types.rb +0 -35
  155. data/test/scanner/test_unicode_blocks.rb +0 -30
  156. data/test/support/disable_autotest.rb +0 -8
  157. data/test/syntax/test_all.rb +0 -6
  158. data/test/syntax/test_syntax.rb +0 -61
  159. data/test/syntax/test_syntax_token_map.rb +0 -25
  160. data/test/syntax/versions/test_1.8.rb +0 -55
  161. data/test/syntax/versions/test_1.9.1.rb +0 -36
  162. data/test/syntax/versions/test_1.9.3.rb +0 -32
  163. data/test/syntax/versions/test_2.0.0.rb +0 -37
  164. data/test/syntax/versions/test_2.2.0.rb +0 -32
  165. data/test/syntax/versions/test_aliases.rb +0 -129
  166. data/test/syntax/versions/test_all.rb +0 -5
  167. data/test/test_all.rb +0 -5
  168. data/test/token/test_all.rb +0 -2
  169. data/test/token/test_token.rb +0 -107
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Scanner) do
4
+ RSpec.shared_examples 'scan error' do |error, issue, source|
5
+ it "raises #{error} for #{issue} `#{source}`" do
6
+ expect { RS.scan(source) }.to raise_error(error)
7
+ end
8
+ end
9
+
10
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
11
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
12
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
13
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced interval', 'a{1,2'
14
+ include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
15
+ include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
16
+ include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
17
+ include_examples 'scan error', RS::PrematureEndError, 'eof escape', '\\'
18
+ include_examples 'scan error', RS::PrematureEndError, 'eof in hex escape', '\x'
19
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u'
20
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u0'
21
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u00'
22
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u000'
23
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{'
24
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{00'
25
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000'
26
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 '
27
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 0000'
28
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c'
29
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M'
30
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M-'
31
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C'
32
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-'
33
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M'
34
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M-'
35
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M'
36
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-'
37
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\\'
38
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\c'
39
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C'
40
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C-'
41
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ'
42
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ0'
43
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\cü'
44
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\c\M-ü'
45
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-ü'
46
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-\M-ü'
47
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-ü'
48
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\cü'
49
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\C-ü'
50
+ include_examples 'scan error', RS::ScannerError, 'invalid c-seq', '\Ca'
51
+ include_examples 'scan error', RS::ScannerError, 'invalid m-seq', '\Ma'
52
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?'')"
53
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?''empty-name)"
54
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>)'
55
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>empty-name)'
56
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?foo)'
57
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix abc)'
58
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix^bc'
59
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?)'
60
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-foo)'
61
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-u)'
62
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-mixu)'
63
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k<>'
64
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k\'\''
65
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g<>'
66
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g\'\''
67
+ include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
68
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Escape scanning') do
4
+ include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
+
6
+ # not an escape outside a character set
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
+
9
+ include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
+ include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
11
+ include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
+ include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
+
14
+ include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
15
+
16
+ include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
+ include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
18
+ include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
19
+
20
+ include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
21
+ include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
22
+
23
+ include_examples 'scan', 'a\u0640c', 1 => [:escape, :codepoint, '\u0640', 1, 7]
24
+ include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
25
+ include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
26
+
27
+ include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
28
+ include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
29
+ include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
30
+ include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
31
+ include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
32
+ include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
33
+ include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
34
+ include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
35
+ include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
36
+ include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
37
+
38
+ include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
39
+ include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
40
+ include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
41
+ include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
42
+ include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
43
+ include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
44
+ include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
45
+
46
+ include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
47
+ include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
48
+ include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
49
+
50
+ include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
51
+ include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
52
+ include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
53
+ end
@@ -0,0 +1,133 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('FreeSpace scanning') do
4
+ describe('scan free space tokens') do
5
+ let(:tokens) { RS.scan(/
6
+ a
7
+ b ? c *
8
+ d {2,3}
9
+ e + | f +
10
+ /x) }
11
+
12
+ 0.upto(24).select(&:even?).each do |i|
13
+ it "scans #{i} as free space" do
14
+ expect(tokens[i][0]).to eq :free_space
15
+ expect(tokens[i][1]).to eq :whitespace
16
+ end
17
+ end
18
+ 0.upto(24).reject(&:even?).each do |i|
19
+ it "does not scan #{i} as free space" do
20
+ expect(tokens[i][0]).not_to eq :free_space
21
+ expect(tokens[i][1]).not_to eq :whitespace
22
+ end
23
+ end
24
+
25
+ it 'sets the correct text' do
26
+ [0, 2, 10, 14].each { |i| expect(tokens[i][2]).to eq "\n " }
27
+ [4, 6, 8, 12].each { |i| expect(tokens[i][2]).to eq ' ' }
28
+ end
29
+ end
30
+
31
+ describe('scan free space comments') do
32
+ include_examples 'scan', /
33
+ a + # A + comment
34
+ b ? # B ? comment
35
+ c {2,3} # C {2,3} comment
36
+ d + | e + # D|E comment
37
+ /x,
38
+ 5 => [:free_space, :comment, "# A + comment\n", 11, 25],
39
+ 11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
+ 17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
+ 29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+ end
43
+
44
+ describe('scan free space inlined') do
45
+ include_examples 'scan', /a b(?x:c d e)f g/,
46
+ 0 => [:literal, :literal, 'a b', 0, 3],
47
+ 1 => [:group, :options, '(?x:', 3, 7],
48
+ 2 => [:literal, :literal, 'c', 7, 8],
49
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
50
+ 4 => [:literal, :literal, 'd', 9, 10],
51
+ 5 => [:free_space, :whitespace, ' ', 10, 11],
52
+ 6 => [:literal, :literal, 'e', 11, 12],
53
+ 7 => [:group, :close, ')', 12, 13],
54
+ 8 => [:literal, :literal, 'f g', 13, 16]
55
+ end
56
+
57
+ describe('scan free space nested') do
58
+ include_examples 'scan', /a b(?x:c d(?-x:e f)g h)i j/,
59
+ 0 => [:literal, :literal, 'a b', 0, 3],
60
+ 1 => [:group, :options, '(?x:', 3, 7],
61
+ 2 => [:literal, :literal, 'c', 7, 8],
62
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
63
+ 4 => [:literal, :literal, 'd', 9, 10],
64
+ 5 => [:group, :options, '(?-x:', 10, 15],
65
+ 6 => [:literal, :literal, 'e f', 15, 18],
66
+ 7 => [:group, :close, ')', 18, 19],
67
+ 8 => [:literal, :literal, 'g', 19, 20],
68
+ 9 => [:free_space, :whitespace, ' ', 20, 21],
69
+ 10 => [:literal, :literal, 'h', 21, 22],
70
+ 11 => [:group, :close, ')', 22, 23],
71
+ 12 => [:literal, :literal, 'i j', 23, 26]
72
+ end
73
+
74
+ describe('scan free space nested groups') do
75
+ include_examples 'scan', /(a (b(?x: (c d) (?-x:(e f) )g) h)i j)/,
76
+ 0 => [:group, :capture, '(', 0, 1],
77
+ 1 => [:literal, :literal, 'a ', 1, 3],
78
+ 2 => [:group, :capture, '(', 3, 4],
79
+ 3 => [:literal, :literal, 'b', 4, 5],
80
+ 4 => [:group, :options, '(?x:', 5, 9],
81
+ 5 => [:free_space, :whitespace, ' ', 9, 10],
82
+ 6 => [:group, :capture, '(', 10, 11],
83
+ 7 => [:literal, :literal, 'c', 11, 12],
84
+ 8 => [:free_space, :whitespace, ' ', 12, 13],
85
+ 9 => [:literal, :literal, 'd', 13, 14],
86
+ 10 => [:group, :close, ')', 14, 15],
87
+ 11 => [:free_space, :whitespace, ' ', 15, 16],
88
+ 12 => [:group, :options, '(?-x:', 16, 21],
89
+ 13 => [:group, :capture, '(', 21, 22],
90
+ 14 => [:literal, :literal, 'e f', 22, 25],
91
+ 15 => [:group, :close, ')', 25, 26],
92
+ 16 => [:literal, :literal, ' ', 26, 27],
93
+ 17 => [:group, :close, ')', 27, 28],
94
+ 18 => [:literal, :literal, 'g', 28, 29],
95
+ 19 => [:group, :close, ')', 29, 30],
96
+ 20 => [:literal, :literal, ' h', 30, 32],
97
+ 21 => [:group, :close, ')', 32, 33],
98
+ 22 => [:literal, :literal, 'i j', 33, 36],
99
+ 23 => [:group, :close, ')', 36, 37]
100
+ end
101
+
102
+ describe('scan free space switch groups') do
103
+ include_examples 'scan', /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/,
104
+ 0 => [:group, :capture, '(', 0, 1],
105
+ 1 => [:literal, :literal, 'a ', 1, 3],
106
+ 2 => [:group, :capture, '(', 3, 4],
107
+ 3 => [:literal, :literal, 'b', 4, 5],
108
+ 4 => [:group, :capture, '(', 5, 6],
109
+ 5 => [:group, :options_switch, '(?x', 6, 9],
110
+ 6 => [:group, :close, ')', 9, 10],
111
+ 7 => [:free_space, :whitespace, ' ', 10, 11],
112
+ 8 => [:group, :capture, '(', 11, 12],
113
+ 9 => [:literal, :literal, 'c', 12, 13],
114
+ 10 => [:free_space, :whitespace, ' ', 13, 14],
115
+ 11 => [:literal, :literal, 'd', 14, 15],
116
+ 12 => [:group, :close, ')', 15, 16],
117
+ 13 => [:free_space, :whitespace, ' ', 16, 17],
118
+ 14 => [:group, :capture, '(', 17, 18],
119
+ 15 => [:group, :options_switch, '(?-x', 18, 22],
120
+ 16 => [:group, :close, ')', 22, 23],
121
+ 17 => [:group, :capture, '(', 23, 24],
122
+ 18 => [:literal, :literal, 'e f', 24, 27],
123
+ 19 => [:group, :close, ')', 27, 28],
124
+ 20 => [:literal, :literal, ' ', 28, 29],
125
+ 21 => [:group, :close, ')', 29, 30],
126
+ 22 => [:literal, :literal, 'g', 30, 31],
127
+ 23 => [:group, :close, ')', 31, 32],
128
+ 24 => [:literal, :literal, ' h', 32, 34],
129
+ 25 => [:group, :close, ')', 34, 35],
130
+ 26 => [:literal, :literal, 'i j', 35, 38],
131
+ 27 => [:group, :close, ')', 38, 39]
132
+ end
133
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Group scanning') do
4
+ # Group types
5
+ include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
+ include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
+
8
+ include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
+ include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
+
11
+ include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
+ include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
13
+
14
+ include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
+ include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
16
+ include_examples 'scan', '(?::)', 0 => [:group, :passive, '(?:', 0, 3]
17
+
18
+ # Comments
19
+ include_examples 'scan', '(?#abc)', 0 => [:group, :comment, '(?#abc)', 0, 7]
20
+ include_examples 'scan', '(?#)', 0 => [:group, :comment, '(?#)', 0, 4]
21
+
22
+ # Assertions
23
+ include_examples 'scan', '(?=abc)', 0 => [:assertion, :lookahead, '(?=', 0, 3]
24
+ include_examples 'scan', '(?!abc)', 0 => [:assertion, :nlookahead, '(?!', 0, 3]
25
+ include_examples 'scan', '(?<=abc)', 0 => [:assertion, :lookbehind, '(?<=', 0, 4]
26
+ include_examples 'scan', '(?<!abc)', 0 => [:assertion, :nlookbehind, '(?<!', 0, 4]
27
+
28
+ # Options
29
+ include_examples 'scan', '(?-mix:abc)', 0 => [:group, :options, '(?-mix:', 0, 7]
30
+ include_examples 'scan', '(?m-ix:abc)', 0 => [:group, :options, '(?m-ix:', 0, 7]
31
+ include_examples 'scan', '(?mi-x:abc)', 0 => [:group, :options, '(?mi-x:', 0, 7]
32
+ include_examples 'scan', '(?mix:abc)', 0 => [:group, :options, '(?mix:', 0, 6]
33
+ include_examples 'scan', '(?m:)', 0 => [:group, :options, '(?m:', 0, 4]
34
+ include_examples 'scan', '(?i:)', 0 => [:group, :options, '(?i:', 0, 4]
35
+ include_examples 'scan', '(?x:)', 0 => [:group, :options, '(?x:', 0, 4]
36
+ include_examples 'scan', '(?mix)', 0 => [:group, :options_switch, '(?mix', 0, 5]
37
+ include_examples 'scan', '(?d-mix:abc)', 0 => [:group, :options, '(?d-mix:', 0, 8]
38
+ include_examples 'scan', '(?a-mix:abc)', 0 => [:group, :options, '(?a-mix:', 0, 8]
39
+ include_examples 'scan', '(?u-mix:abc)', 0 => [:group, :options, '(?u-mix:', 0, 8]
40
+ include_examples 'scan', '(?da-m:abc)', 0 => [:group, :options, '(?da-m:', 0, 7]
41
+ include_examples 'scan', '(?du-x:abc)', 0 => [:group, :options, '(?du-x:', 0, 7]
42
+ include_examples 'scan', '(?dau-i:abc)', 0 => [:group, :options, '(?dau-i:', 0, 8]
43
+ include_examples 'scan', '(?dau:abc)', 0 => [:group, :options, '(?dau:', 0, 6]
44
+ include_examples 'scan', '(?d:)', 0 => [:group, :options, '(?d:', 0, 4]
45
+ include_examples 'scan', '(?a:)', 0 => [:group, :options, '(?a:', 0, 4]
46
+ include_examples 'scan', '(?u:)', 0 => [:group, :options, '(?u:', 0, 4]
47
+ include_examples 'scan', '(?dau)', 0 => [:group, :options_switch, '(?dau', 0, 5]
48
+
49
+ if ruby_version_at_least('2.4.1')
50
+ include_examples 'scan', '(?~abc)', 0 => [:group, :absence, '(?~', 0, 3]
51
+ end
52
+ end
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Keep scanning') do
4
+ include_examples 'scan', /ab\Kcd/,
5
+ 1 => [:keep, :mark, '\K', 2, 4]
6
+
7
+ include_examples 'scan', /(a\Kb)|(c\\\Kd)ef/,
8
+ 2 => [:keep, :mark, '\K', 2, 4],
9
+ 9 => [:keep, :mark, '\K', 11, 13]
10
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('UTF8 scanning') do
4
+ # ascii, single byte characters
5
+ include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
6
+
7
+ include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
+ include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
+
10
+ # 2 byte wide characters, Arabic
11
+ include_examples 'scan', 'aاbبcت', 0 => [:literal, :literal, 'aاbبcت', 0, 9]
12
+
13
+ include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
14
+ include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
+
16
+ include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
17
+ include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
+ include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
19
+ include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
+
21
+ include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
22
+ include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
23
+ include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
24
+ include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
+ include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
26
+ include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
27
+ include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
28
+
29
+ # 3 byte wide characters, Japanese
30
+ include_examples 'scan', 'ab?れます+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
+ include_examples 'scan', 'ab?れます+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
+ include_examples 'scan', 'ab?れます+cd', 2 => [:literal, :literal, 'れます', 3, 12]
33
+ include_examples 'scan', 'ab?れます+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
+ include_examples 'scan', 'ab?れます+cd', 4 => [:literal, :literal, 'cd', 13, 15]
35
+
36
+ # 4 byte wide characters, Osmanya
37
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 0 => [:literal, :literal, '𐒀𐒁', 0, 8]
38
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 2 => [:literal, :literal, '𐒂ab', 9, 15]
40
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 4 => [:literal, :literal, '𐒃', 16, 20]
42
+
43
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 0 => [:literal, :literal, 'mu𝄞', 0, 6]
44
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 2 => [:literal, :literal, 'si', 7, 9]
46
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 4 => [:literal, :literal, '𝄫c', 10, 15]
48
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
49
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Meta scanning') do
4
+ include_examples 'scan', /abc??|def*+|ghi+/,
5
+ 0 => [:literal, :literal, 'abc', 0, 3],
6
+ 1 => [:quantifier, :zero_or_one_reluctant, '??', 3, 5],
7
+ 2 => [:meta, :alternation, '|', 5, 6],
8
+ 3 => [:literal, :literal, 'def', 6, 9],
9
+ 4 => [:quantifier, :zero_or_more_possessive, '*+', 9, 11],
10
+ 5 => [:meta, :alternation, '|', 11, 12]
11
+
12
+ include_examples 'scan', /(a\|b)|(c|d)\|(e[|]f)/,
13
+ 2 => [:escape, :alternation, '\|', 2, 4],
14
+ 5 => [:meta, :alternation, '|', 6, 7],
15
+ 8 => [:meta, :alternation, '|', 9, 10],
16
+ 11 => [:escape, :alternation, '\|', 12, 14],
17
+ 15 => [:literal, :literal, '|', 17, 18]
18
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Property scanning') do
4
+ RSpec.shared_examples 'scan property' do |text, token|
5
+ it("scans \\p{#{text}} as property #{token}") do
6
+ result = RS.scan("\\p{#{text}}")[0]
7
+ expect(result[0..1]).to eq [:property, token]
8
+ end
9
+
10
+ it("scans \\P{#{text}} as nonproperty #{token}") do
11
+ result = RS.scan("\\P{#{text}}")[0]
12
+ expect(result[0..1]).to eq [:nonproperty, token]
13
+ end
14
+
15
+ it("scans \\p{^#{text}} as nonproperty #{token}") do
16
+ result = RS.scan("\\p{^#{text}}")[0]
17
+ expect(result[0..1]).to eq [:nonproperty, token]
18
+ end
19
+
20
+ it("scans double-negated \\P{^#{text}} as property #{token}") do
21
+ result = RS.scan("\\P{^#{text}}")[0]
22
+ expect(result[0..1]).to eq [:property, token]
23
+ end
24
+ end
25
+
26
+ include_examples 'scan property', 'Alnum', :alnum
27
+
28
+ include_examples 'scan property', 'XPosixPunct', :xposixpunct
29
+
30
+ include_examples 'scan property', 'Newline', :newline
31
+
32
+ include_examples 'scan property', 'Any', :any
33
+
34
+ include_examples 'scan property', 'Assigned', :assigned
35
+
36
+ include_examples 'scan property', 'Age=1.1', :'age=1.1'
37
+ include_examples 'scan property', 'Age=10.0', :'age=10.0'
38
+
39
+ include_examples 'scan property', 'ahex', :ascii_hex_digit
40
+ include_examples 'scan property', 'ASCII_Hex_Digit', :ascii_hex_digit # test underscore
41
+
42
+ include_examples 'scan property', 'sd', :soft_dotted
43
+ include_examples 'scan property', 'Soft-Dotted', :soft_dotted # test dash
44
+
45
+ include_examples 'scan property', 'Egyp', :egyptian_hieroglyphs
46
+ include_examples 'scan property', 'Egyptian Hieroglyphs', :egyptian_hieroglyphs # test whitespace
47
+
48
+ include_examples 'scan property', 'Linb', :linear_b
49
+ include_examples 'scan property', 'Linear-B', :linear_b # test dash
50
+
51
+ include_examples 'scan property', 'InArabic', :in_arabic # test block
52
+ include_examples 'scan property', 'in Arabic', :in_arabic # test block w. whitespace
53
+ include_examples 'scan property', 'In_Arabic', :in_arabic # test block w. underscore
54
+
55
+ include_examples 'scan property', 'Yiii', :yi
56
+ include_examples 'scan property', 'Yi', :yi
57
+
58
+ include_examples 'scan property', 'Zinh', :inherited
59
+ include_examples 'scan property', 'Inherited', :inherited
60
+ include_examples 'scan property', 'Qaai', :inherited
61
+
62
+ include_examples 'scan property', 'Zzzz', :unknown
63
+ include_examples 'scan property', 'Unknown', :unknown
64
+ end