regexp_parser 1.3.0 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -1
  3. data/Gemfile +3 -3
  4. data/README.md +12 -19
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +34 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +12 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +72 -21
  31. data/lib/regexp_parser/syntax/versions/2.6.0.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  33. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  34. data/lib/regexp_parser/version.rb +1 -1
  35. data/regexp_parser.gemspec +3 -3
  36. data/spec/expression/base_spec.rb +94 -0
  37. data/spec/expression/clone_spec.rb +120 -0
  38. data/spec/expression/conditional_spec.rb +89 -0
  39. data/spec/expression/free_space_spec.rb +27 -0
  40. data/spec/expression/methods/match_length_spec.rb +161 -0
  41. data/spec/expression/methods/match_spec.rb +25 -0
  42. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  43. data/spec/expression/methods/tests_spec.rb +99 -0
  44. data/spec/expression/methods/traverse_spec.rb +161 -0
  45. data/spec/expression/options_spec.rb +128 -0
  46. data/spec/expression/root_spec.rb +9 -0
  47. data/spec/expression/sequence_spec.rb +9 -0
  48. data/spec/expression/subexpression_spec.rb +50 -0
  49. data/spec/expression/to_h_spec.rb +26 -0
  50. data/spec/expression/to_s_spec.rb +100 -0
  51. data/spec/lexer/all_spec.rb +22 -0
  52. data/spec/lexer/conditionals_spec.rb +53 -0
  53. data/spec/lexer/delimiters_spec.rb +68 -0
  54. data/spec/lexer/escapes_spec.rb +14 -0
  55. data/spec/lexer/keep_spec.rb +10 -0
  56. data/spec/lexer/literals_spec.rb +89 -0
  57. data/spec/lexer/nesting_spec.rb +99 -0
  58. data/spec/lexer/refcalls_spec.rb +55 -0
  59. data/spec/parser/all_spec.rb +43 -0
  60. data/spec/parser/alternation_spec.rb +88 -0
  61. data/spec/parser/anchors_spec.rb +17 -0
  62. data/spec/parser/conditionals_spec.rb +179 -0
  63. data/spec/parser/errors_spec.rb +30 -0
  64. data/spec/parser/escapes_spec.rb +121 -0
  65. data/spec/parser/free_space_spec.rb +130 -0
  66. data/spec/parser/groups_spec.rb +108 -0
  67. data/spec/parser/keep_spec.rb +6 -0
  68. data/spec/parser/posix_classes_spec.rb +8 -0
  69. data/spec/parser/properties_spec.rb +115 -0
  70. data/spec/parser/quantifiers_spec.rb +52 -0
  71. data/spec/parser/refcalls_spec.rb +112 -0
  72. data/spec/parser/set/intersections_spec.rb +127 -0
  73. data/spec/parser/set/ranges_spec.rb +111 -0
  74. data/spec/parser/sets_spec.rb +178 -0
  75. data/spec/parser/types_spec.rb +18 -0
  76. data/spec/scanner/all_spec.rb +18 -0
  77. data/spec/scanner/anchors_spec.rb +21 -0
  78. data/spec/scanner/conditionals_spec.rb +128 -0
  79. data/spec/scanner/delimiters_spec.rb +52 -0
  80. data/spec/scanner/errors_spec.rb +67 -0
  81. data/spec/scanner/escapes_spec.rb +53 -0
  82. data/spec/scanner/free_space_spec.rb +133 -0
  83. data/spec/scanner/groups_spec.rb +52 -0
  84. data/spec/scanner/keep_spec.rb +10 -0
  85. data/spec/scanner/literals_spec.rb +49 -0
  86. data/spec/scanner/meta_spec.rb +18 -0
  87. data/spec/scanner/properties_spec.rb +64 -0
  88. data/spec/scanner/quantifiers_spec.rb +20 -0
  89. data/spec/scanner/refcalls_spec.rb +36 -0
  90. data/spec/scanner/sets_spec.rb +102 -0
  91. data/spec/scanner/types_spec.rb +14 -0
  92. data/spec/spec_helper.rb +15 -0
  93. data/{test → spec}/support/runner.rb +9 -8
  94. data/spec/support/shared_examples.rb +77 -0
  95. data/{test → spec}/support/warning_extractor.rb +5 -7
  96. data/spec/syntax/syntax_spec.rb +48 -0
  97. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  98. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  99. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  100. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  101. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  102. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  103. data/spec/syntax/versions/aliases_spec.rb +37 -0
  104. data/spec/token/token_spec.rb +85 -0
  105. metadata +151 -146
  106. data/test/expression/test_all.rb +0 -12
  107. data/test/expression/test_base.rb +0 -90
  108. data/test/expression/test_clone.rb +0 -89
  109. data/test/expression/test_conditionals.rb +0 -113
  110. data/test/expression/test_free_space.rb +0 -35
  111. data/test/expression/test_set.rb +0 -84
  112. data/test/expression/test_strfregexp.rb +0 -230
  113. data/test/expression/test_subexpression.rb +0 -58
  114. data/test/expression/test_tests.rb +0 -99
  115. data/test/expression/test_to_h.rb +0 -59
  116. data/test/expression/test_to_s.rb +0 -104
  117. data/test/expression/test_traverse.rb +0 -161
  118. data/test/helpers.rb +0 -10
  119. data/test/lexer/test_all.rb +0 -41
  120. data/test/lexer/test_conditionals.rb +0 -127
  121. data/test/lexer/test_keep.rb +0 -24
  122. data/test/lexer/test_literals.rb +0 -130
  123. data/test/lexer/test_nesting.rb +0 -132
  124. data/test/lexer/test_refcalls.rb +0 -56
  125. data/test/parser/set/test_intersections.rb +0 -127
  126. data/test/parser/set/test_ranges.rb +0 -111
  127. data/test/parser/test_all.rb +0 -64
  128. data/test/parser/test_alternation.rb +0 -92
  129. data/test/parser/test_anchors.rb +0 -34
  130. data/test/parser/test_conditionals.rb +0 -187
  131. data/test/parser/test_errors.rb +0 -63
  132. data/test/parser/test_escapes.rb +0 -134
  133. data/test/parser/test_free_space.rb +0 -139
  134. data/test/parser/test_groups.rb +0 -289
  135. data/test/parser/test_keep.rb +0 -21
  136. data/test/parser/test_posix_classes.rb +0 -27
  137. data/test/parser/test_properties.rb +0 -133
  138. data/test/parser/test_quantifiers.rb +0 -301
  139. data/test/parser/test_refcalls.rb +0 -186
  140. data/test/parser/test_sets.rb +0 -179
  141. data/test/parser/test_types.rb +0 -50
  142. data/test/scanner/test_all.rb +0 -38
  143. data/test/scanner/test_anchors.rb +0 -38
  144. data/test/scanner/test_conditionals.rb +0 -184
  145. data/test/scanner/test_errors.rb +0 -91
  146. data/test/scanner/test_escapes.rb +0 -56
  147. data/test/scanner/test_free_space.rb +0 -200
  148. data/test/scanner/test_groups.rb +0 -79
  149. data/test/scanner/test_keep.rb +0 -35
  150. data/test/scanner/test_literals.rb +0 -89
  151. data/test/scanner/test_meta.rb +0 -40
  152. data/test/scanner/test_properties.rb +0 -312
  153. data/test/scanner/test_quantifiers.rb +0 -37
  154. data/test/scanner/test_refcalls.rb +0 -52
  155. data/test/scanner/test_scripts.rb +0 -53
  156. data/test/scanner/test_sets.rb +0 -119
  157. data/test/scanner/test_types.rb +0 -35
  158. data/test/scanner/test_unicode_blocks.rb +0 -30
  159. data/test/support/disable_autotest.rb +0 -8
  160. data/test/syntax/test_all.rb +0 -6
  161. data/test/syntax/test_syntax.rb +0 -61
  162. data/test/syntax/test_syntax_token_map.rb +0 -25
  163. data/test/syntax/versions/test_1.8.rb +0 -55
  164. data/test/syntax/versions/test_1.9.1.rb +0 -36
  165. data/test/syntax/versions/test_1.9.3.rb +0 -32
  166. data/test/syntax/versions/test_2.0.0.rb +0 -37
  167. data/test/syntax/versions/test_2.2.0.rb +0 -32
  168. data/test/syntax/versions/test_aliases.rb +0 -129
  169. data/test/syntax/versions/test_all.rb +0 -5
  170. data/test/test_all.rb +0 -5
  171. data/test/token/test_all.rb +0 -2
  172. data/test/token/test_token.rb +0 -107
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter scanning') do
4
+ include_examples 'scan', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1]
6
+
7
+ include_examples 'scan', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2]
9
+
10
+ include_examples 'scan', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1]
12
+
13
+ include_examples 'scan', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2]
15
+
16
+ include_examples 'scan', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2]
18
+
19
+ include_examples 'scan', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2]
21
+
22
+ include_examples 'scan', '}{+',
23
+ 0 => [:literal, :literal, '}{', 0, 2]
24
+
25
+ include_examples 'scan', '{{var}}',
26
+ 0 => [:literal, :literal, '{{var}}', 0, 7]
27
+
28
+ include_examples 'scan', 'a{1,2',
29
+ 0 => [:literal, :literal, 'a{1,2', 0, 5]
30
+
31
+ include_examples 'scan', '({.+})',
32
+ 0 => [:group, :capture, '(', 0, 1],
33
+ 1 => [:literal, :literal, '{', 1, 2],
34
+ 2 => [:meta, :dot, '.', 2, 3],
35
+ 3 => [:quantifier, :one_or_more, '+', 3, 4],
36
+ 4 => [:literal, :literal, '}', 4, 5],
37
+ 5 => [:group, :close, ')', 5, 6]
38
+
39
+ include_examples 'scan', ']',
40
+ 0 => [:literal, :literal, ']', 0, 1]
41
+
42
+ include_examples 'scan', ']]',
43
+ 0 => [:literal, :literal, ']]', 0, 2]
44
+
45
+ include_examples 'scan', ']\[',
46
+ 0 => [:literal, :literal, ']', 0, 1],
47
+ 1 => [:escape, :set_open, '\[', 1, 3]
48
+
49
+ include_examples 'scan', '()',
50
+ 0 => [:group, :capture, '(', 0, 1],
51
+ 1 => [:group, :close, ')', 1, 2]
52
+ end
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Scanner) do
4
+ RSpec.shared_examples 'scan error' do |error, issue, source|
5
+ it "raises #{error} for #{issue} `#{source}`" do
6
+ expect { RS.scan(source) }.to raise_error(error)
7
+ end
8
+ end
9
+
10
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
11
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
12
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
13
+ include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
14
+ include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
15
+ include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
16
+ include_examples 'scan error', RS::PrematureEndError, 'eof escape', '\\'
17
+ include_examples 'scan error', RS::PrematureEndError, 'eof in hex escape', '\x'
18
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u'
19
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u0'
20
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u00'
21
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u000'
22
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{'
23
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{00'
24
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000'
25
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 '
26
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 0000'
27
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c'
28
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M'
29
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M-'
30
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C'
31
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-'
32
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M'
33
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M-'
34
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M'
35
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-'
36
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\\'
37
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\c'
38
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C'
39
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C-'
40
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ'
41
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ0'
42
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\cü'
43
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\c\M-ü'
44
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-ü'
45
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-\M-ü'
46
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-ü'
47
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\cü'
48
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\C-ü'
49
+ include_examples 'scan error', RS::ScannerError, 'invalid c-seq', '\Ca'
50
+ include_examples 'scan error', RS::ScannerError, 'invalid m-seq', '\Ma'
51
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?'')"
52
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?''empty-name)"
53
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>)'
54
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>empty-name)'
55
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?foo)'
56
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix abc)'
57
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix^bc'
58
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?)'
59
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-foo)'
60
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-u)'
61
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-mixu)'
62
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k<>'
63
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k\'\''
64
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g<>'
65
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g\'\''
66
+ include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
67
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Escape scanning') do
4
+ include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
+
6
+ # not an escape outside a character set
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
+
9
+ include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
+ include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
11
+ include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
+ include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
+
14
+ include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
15
+
16
+ include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
+ include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
18
+ include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
19
+
20
+ include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
21
+ include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
22
+
23
+ include_examples 'scan', 'a\u0640c', 1 => [:escape, :codepoint, '\u0640', 1, 7]
24
+ include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
25
+ include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
26
+
27
+ include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
28
+ include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
29
+ include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
30
+ include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
31
+ include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
32
+ include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
33
+ include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
34
+ include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
35
+ include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
36
+ include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
37
+
38
+ include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
39
+ include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
40
+ include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
41
+ include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
42
+ include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
43
+ include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
44
+ include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
45
+
46
+ include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
47
+ include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
48
+ include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
49
+
50
+ include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
51
+ include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
52
+ include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
53
+ end
@@ -0,0 +1,133 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('FreeSpace scanning') do
4
+ describe('scan free space tokens') do
5
+ let(:tokens) { RS.scan(/
6
+ a
7
+ b ? c *
8
+ d {2,3}
9
+ e + | f +
10
+ /x) }
11
+
12
+ 0.upto(24).select(&:even?).each do |i|
13
+ it "scans #{i} as free space" do
14
+ expect(tokens[i][0]).to eq :free_space
15
+ expect(tokens[i][1]).to eq :whitespace
16
+ end
17
+ end
18
+ 0.upto(24).reject(&:even?).each do |i|
19
+ it "does not scan #{i} as free space" do
20
+ expect(tokens[i][0]).not_to eq :free_space
21
+ expect(tokens[i][1]).not_to eq :whitespace
22
+ end
23
+ end
24
+
25
+ it 'sets the correct text' do
26
+ [0, 2, 10, 14].each { |i| expect(tokens[i][2]).to eq "\n " }
27
+ [4, 6, 8, 12].each { |i| expect(tokens[i][2]).to eq ' ' }
28
+ end
29
+ end
30
+
31
+ describe('scan free space comments') do
32
+ include_examples 'scan', /
33
+ a + # A + comment
34
+ b ? # B ? comment
35
+ c {2,3} # C {2,3} comment
36
+ d + | e + # D|E comment
37
+ /x,
38
+ 5 => [:free_space, :comment, "# A + comment\n", 11, 25],
39
+ 11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
+ 17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
+ 29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+ end
43
+
44
+ describe('scan free space inlined') do
45
+ include_examples 'scan', /a b(?x:c d e)f g/,
46
+ 0 => [:literal, :literal, 'a b', 0, 3],
47
+ 1 => [:group, :options, '(?x:', 3, 7],
48
+ 2 => [:literal, :literal, 'c', 7, 8],
49
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
50
+ 4 => [:literal, :literal, 'd', 9, 10],
51
+ 5 => [:free_space, :whitespace, ' ', 10, 11],
52
+ 6 => [:literal, :literal, 'e', 11, 12],
53
+ 7 => [:group, :close, ')', 12, 13],
54
+ 8 => [:literal, :literal, 'f g', 13, 16]
55
+ end
56
+
57
+ describe('scan free space nested') do
58
+ include_examples 'scan', /a b(?x:c d(?-x:e f)g h)i j/,
59
+ 0 => [:literal, :literal, 'a b', 0, 3],
60
+ 1 => [:group, :options, '(?x:', 3, 7],
61
+ 2 => [:literal, :literal, 'c', 7, 8],
62
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
63
+ 4 => [:literal, :literal, 'd', 9, 10],
64
+ 5 => [:group, :options, '(?-x:', 10, 15],
65
+ 6 => [:literal, :literal, 'e f', 15, 18],
66
+ 7 => [:group, :close, ')', 18, 19],
67
+ 8 => [:literal, :literal, 'g', 19, 20],
68
+ 9 => [:free_space, :whitespace, ' ', 20, 21],
69
+ 10 => [:literal, :literal, 'h', 21, 22],
70
+ 11 => [:group, :close, ')', 22, 23],
71
+ 12 => [:literal, :literal, 'i j', 23, 26]
72
+ end
73
+
74
+ describe('scan free space nested groups') do
75
+ include_examples 'scan', /(a (b(?x: (c d) (?-x:(e f) )g) h)i j)/,
76
+ 0 => [:group, :capture, '(', 0, 1],
77
+ 1 => [:literal, :literal, 'a ', 1, 3],
78
+ 2 => [:group, :capture, '(', 3, 4],
79
+ 3 => [:literal, :literal, 'b', 4, 5],
80
+ 4 => [:group, :options, '(?x:', 5, 9],
81
+ 5 => [:free_space, :whitespace, ' ', 9, 10],
82
+ 6 => [:group, :capture, '(', 10, 11],
83
+ 7 => [:literal, :literal, 'c', 11, 12],
84
+ 8 => [:free_space, :whitespace, ' ', 12, 13],
85
+ 9 => [:literal, :literal, 'd', 13, 14],
86
+ 10 => [:group, :close, ')', 14, 15],
87
+ 11 => [:free_space, :whitespace, ' ', 15, 16],
88
+ 12 => [:group, :options, '(?-x:', 16, 21],
89
+ 13 => [:group, :capture, '(', 21, 22],
90
+ 14 => [:literal, :literal, 'e f', 22, 25],
91
+ 15 => [:group, :close, ')', 25, 26],
92
+ 16 => [:literal, :literal, ' ', 26, 27],
93
+ 17 => [:group, :close, ')', 27, 28],
94
+ 18 => [:literal, :literal, 'g', 28, 29],
95
+ 19 => [:group, :close, ')', 29, 30],
96
+ 20 => [:literal, :literal, ' h', 30, 32],
97
+ 21 => [:group, :close, ')', 32, 33],
98
+ 22 => [:literal, :literal, 'i j', 33, 36],
99
+ 23 => [:group, :close, ')', 36, 37]
100
+ end
101
+
102
+ describe('scan free space switch groups') do
103
+ include_examples 'scan', /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/,
104
+ 0 => [:group, :capture, '(', 0, 1],
105
+ 1 => [:literal, :literal, 'a ', 1, 3],
106
+ 2 => [:group, :capture, '(', 3, 4],
107
+ 3 => [:literal, :literal, 'b', 4, 5],
108
+ 4 => [:group, :capture, '(', 5, 6],
109
+ 5 => [:group, :options_switch, '(?x', 6, 9],
110
+ 6 => [:group, :close, ')', 9, 10],
111
+ 7 => [:free_space, :whitespace, ' ', 10, 11],
112
+ 8 => [:group, :capture, '(', 11, 12],
113
+ 9 => [:literal, :literal, 'c', 12, 13],
114
+ 10 => [:free_space, :whitespace, ' ', 13, 14],
115
+ 11 => [:literal, :literal, 'd', 14, 15],
116
+ 12 => [:group, :close, ')', 15, 16],
117
+ 13 => [:free_space, :whitespace, ' ', 16, 17],
118
+ 14 => [:group, :capture, '(', 17, 18],
119
+ 15 => [:group, :options_switch, '(?-x', 18, 22],
120
+ 16 => [:group, :close, ')', 22, 23],
121
+ 17 => [:group, :capture, '(', 23, 24],
122
+ 18 => [:literal, :literal, 'e f', 24, 27],
123
+ 19 => [:group, :close, ')', 27, 28],
124
+ 20 => [:literal, :literal, ' ', 28, 29],
125
+ 21 => [:group, :close, ')', 29, 30],
126
+ 22 => [:literal, :literal, 'g', 30, 31],
127
+ 23 => [:group, :close, ')', 31, 32],
128
+ 24 => [:literal, :literal, ' h', 32, 34],
129
+ 25 => [:group, :close, ')', 34, 35],
130
+ 26 => [:literal, :literal, 'i j', 35, 38],
131
+ 27 => [:group, :close, ')', 38, 39]
132
+ end
133
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Group scanning') do
4
+ # Group types
5
+ include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
+ include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
+
8
+ include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
+ include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
+
11
+ include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
+ include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
13
+
14
+ include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
+ include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
16
+ include_examples 'scan', '(?::)', 0 => [:group, :passive, '(?:', 0, 3]
17
+
18
+ # Comments
19
+ include_examples 'scan', '(?#abc)', 0 => [:group, :comment, '(?#abc)', 0, 7]
20
+ include_examples 'scan', '(?#)', 0 => [:group, :comment, '(?#)', 0, 4]
21
+
22
+ # Assertions
23
+ include_examples 'scan', '(?=abc)', 0 => [:assertion, :lookahead, '(?=', 0, 3]
24
+ include_examples 'scan', '(?!abc)', 0 => [:assertion, :nlookahead, '(?!', 0, 3]
25
+ include_examples 'scan', '(?<=abc)', 0 => [:assertion, :lookbehind, '(?<=', 0, 4]
26
+ include_examples 'scan', '(?<!abc)', 0 => [:assertion, :nlookbehind, '(?<!', 0, 4]
27
+
28
+ # Options
29
+ include_examples 'scan', '(?-mix:abc)', 0 => [:group, :options, '(?-mix:', 0, 7]
30
+ include_examples 'scan', '(?m-ix:abc)', 0 => [:group, :options, '(?m-ix:', 0, 7]
31
+ include_examples 'scan', '(?mi-x:abc)', 0 => [:group, :options, '(?mi-x:', 0, 7]
32
+ include_examples 'scan', '(?mix:abc)', 0 => [:group, :options, '(?mix:', 0, 6]
33
+ include_examples 'scan', '(?m:)', 0 => [:group, :options, '(?m:', 0, 4]
34
+ include_examples 'scan', '(?i:)', 0 => [:group, :options, '(?i:', 0, 4]
35
+ include_examples 'scan', '(?x:)', 0 => [:group, :options, '(?x:', 0, 4]
36
+ include_examples 'scan', '(?mix)', 0 => [:group, :options_switch, '(?mix', 0, 5]
37
+ include_examples 'scan', '(?d-mix:abc)', 0 => [:group, :options, '(?d-mix:', 0, 8]
38
+ include_examples 'scan', '(?a-mix:abc)', 0 => [:group, :options, '(?a-mix:', 0, 8]
39
+ include_examples 'scan', '(?u-mix:abc)', 0 => [:group, :options, '(?u-mix:', 0, 8]
40
+ include_examples 'scan', '(?da-m:abc)', 0 => [:group, :options, '(?da-m:', 0, 7]
41
+ include_examples 'scan', '(?du-x:abc)', 0 => [:group, :options, '(?du-x:', 0, 7]
42
+ include_examples 'scan', '(?dau-i:abc)', 0 => [:group, :options, '(?dau-i:', 0, 8]
43
+ include_examples 'scan', '(?dau:abc)', 0 => [:group, :options, '(?dau:', 0, 6]
44
+ include_examples 'scan', '(?d:)', 0 => [:group, :options, '(?d:', 0, 4]
45
+ include_examples 'scan', '(?a:)', 0 => [:group, :options, '(?a:', 0, 4]
46
+ include_examples 'scan', '(?u:)', 0 => [:group, :options, '(?u:', 0, 4]
47
+ include_examples 'scan', '(?dau)', 0 => [:group, :options_switch, '(?dau', 0, 5]
48
+
49
+ if ruby_version_at_least('2.4.1')
50
+ include_examples 'scan', '(?~abc)', 0 => [:group, :absence, '(?~', 0, 3]
51
+ end
52
+ end
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Keep scanning') do
4
+ include_examples 'scan', /ab\Kcd/,
5
+ 1 => [:keep, :mark, '\K', 2, 4]
6
+
7
+ include_examples 'scan', /(a\Kb)|(c\\\Kd)ef/,
8
+ 2 => [:keep, :mark, '\K', 2, 4],
9
+ 9 => [:keep, :mark, '\K', 11, 13]
10
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('UTF8 scanning') do
4
+ # ascii, single byte characters
5
+ include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
6
+
7
+ include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
+ include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
+
10
+ # 2 byte wide characters, Arabic
11
+ include_examples 'scan', 'aاbبcت', 0 => [:literal, :literal, 'aاbبcت', 0, 9]
12
+
13
+ include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
14
+ include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
+
16
+ include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
17
+ include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
+ include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
19
+ include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
+
21
+ include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
22
+ include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
23
+ include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
24
+ include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
+ include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
26
+ include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
27
+ include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
28
+
29
+ # 3 byte wide characters, Japanese
30
+ include_examples 'scan', 'ab?れます+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
+ include_examples 'scan', 'ab?れます+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
+ include_examples 'scan', 'ab?れます+cd', 2 => [:literal, :literal, 'れます', 3, 12]
33
+ include_examples 'scan', 'ab?れます+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
+ include_examples 'scan', 'ab?れます+cd', 4 => [:literal, :literal, 'cd', 13, 15]
35
+
36
+ # 4 byte wide characters, Osmanya
37
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 0 => [:literal, :literal, '𐒀𐒁', 0, 8]
38
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 2 => [:literal, :literal, '𐒂ab', 9, 15]
40
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 4 => [:literal, :literal, '𐒃', 16, 20]
42
+
43
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 0 => [:literal, :literal, 'mu𝄞', 0, 6]
44
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 2 => [:literal, :literal, 'si', 7, 9]
46
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 4 => [:literal, :literal, '𝄫c', 10, 15]
48
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
49
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Meta scanning') do
4
+ include_examples 'scan', /abc??|def*+|ghi+/,
5
+ 0 => [:literal, :literal, 'abc', 0, 3],
6
+ 1 => [:quantifier, :zero_or_one_reluctant, '??', 3, 5],
7
+ 2 => [:meta, :alternation, '|', 5, 6],
8
+ 3 => [:literal, :literal, 'def', 6, 9],
9
+ 4 => [:quantifier, :zero_or_more_possessive, '*+', 9, 11],
10
+ 5 => [:meta, :alternation, '|', 11, 12]
11
+
12
+ include_examples 'scan', /(a\|b)|(c|d)\|(e[|]f)/,
13
+ 2 => [:escape, :alternation, '\|', 2, 4],
14
+ 5 => [:meta, :alternation, '|', 6, 7],
15
+ 8 => [:meta, :alternation, '|', 9, 10],
16
+ 11 => [:escape, :alternation, '\|', 12, 14],
17
+ 15 => [:literal, :literal, '|', 17, 18]
18
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Property scanning') do
4
+ RSpec.shared_examples 'scan property' do |text, token|
5
+ it("scans \\p{#{text}} as property #{token}") do
6
+ result = RS.scan("\\p{#{text}}")[0]
7
+ expect(result[0..1]).to eq [:property, token]
8
+ end
9
+
10
+ it("scans \\P{#{text}} as nonproperty #{token}") do
11
+ result = RS.scan("\\P{#{text}}")[0]
12
+ expect(result[0..1]).to eq [:nonproperty, token]
13
+ end
14
+
15
+ it("scans \\p{^#{text}} as nonproperty #{token}") do
16
+ result = RS.scan("\\p{^#{text}}")[0]
17
+ expect(result[0..1]).to eq [:nonproperty, token]
18
+ end
19
+
20
+ it("scans double-negated \\P{^#{text}} as property #{token}") do
21
+ result = RS.scan("\\P{^#{text}}")[0]
22
+ expect(result[0..1]).to eq [:property, token]
23
+ end
24
+ end
25
+
26
+ include_examples 'scan property', 'Alnum', :alnum
27
+
28
+ include_examples 'scan property', 'XPosixPunct', :xposixpunct
29
+
30
+ include_examples 'scan property', 'Newline', :newline
31
+
32
+ include_examples 'scan property', 'Any', :any
33
+
34
+ include_examples 'scan property', 'Assigned', :assigned
35
+
36
+ include_examples 'scan property', 'Age=1.1', :'age=1.1'
37
+ include_examples 'scan property', 'Age=10.0', :'age=10.0'
38
+
39
+ include_examples 'scan property', 'ahex', :ascii_hex_digit
40
+ include_examples 'scan property', 'ASCII_Hex_Digit', :ascii_hex_digit # test underscore
41
+
42
+ include_examples 'scan property', 'sd', :soft_dotted
43
+ include_examples 'scan property', 'Soft-Dotted', :soft_dotted # test dash
44
+
45
+ include_examples 'scan property', 'Egyp', :egyptian_hieroglyphs
46
+ include_examples 'scan property', 'Egyptian Hieroglyphs', :egyptian_hieroglyphs # test whitespace
47
+
48
+ include_examples 'scan property', 'Linb', :linear_b
49
+ include_examples 'scan property', 'Linear-B', :linear_b # test dash
50
+
51
+ include_examples 'scan property', 'InArabic', :in_arabic # test block
52
+ include_examples 'scan property', 'in Arabic', :in_arabic # test block w. whitespace
53
+ include_examples 'scan property', 'In_Arabic', :in_arabic # test block w. underscore
54
+
55
+ include_examples 'scan property', 'Yiii', :yi
56
+ include_examples 'scan property', 'Yi', :yi
57
+
58
+ include_examples 'scan property', 'Zinh', :inherited
59
+ include_examples 'scan property', 'Inherited', :inherited
60
+ include_examples 'scan property', 'Qaai', :inherited
61
+
62
+ include_examples 'scan property', 'Zzzz', :unknown
63
+ include_examples 'scan property', 'Unknown', :unknown
64
+ end