regexp_parser 1.4.0 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -1
  3. data/Gemfile +3 -3
  4. data/README.md +11 -18
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +2 -2
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +161 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +161 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/delimiters_spec.rb +68 -0
  53. data/spec/lexer/escapes_spec.rb +14 -0
  54. data/spec/lexer/keep_spec.rb +10 -0
  55. data/spec/lexer/literals_spec.rb +89 -0
  56. data/spec/lexer/nesting_spec.rb +99 -0
  57. data/spec/lexer/refcalls_spec.rb +55 -0
  58. data/spec/parser/all_spec.rb +43 -0
  59. data/spec/parser/alternation_spec.rb +88 -0
  60. data/spec/parser/anchors_spec.rb +17 -0
  61. data/spec/parser/conditionals_spec.rb +179 -0
  62. data/spec/parser/errors_spec.rb +30 -0
  63. data/spec/parser/escapes_spec.rb +121 -0
  64. data/spec/parser/free_space_spec.rb +130 -0
  65. data/spec/parser/groups_spec.rb +108 -0
  66. data/spec/parser/keep_spec.rb +6 -0
  67. data/spec/parser/posix_classes_spec.rb +8 -0
  68. data/spec/parser/properties_spec.rb +115 -0
  69. data/spec/parser/quantifiers_spec.rb +52 -0
  70. data/spec/parser/refcalls_spec.rb +112 -0
  71. data/spec/parser/set/intersections_spec.rb +127 -0
  72. data/spec/parser/set/ranges_spec.rb +111 -0
  73. data/spec/parser/sets_spec.rb +178 -0
  74. data/spec/parser/types_spec.rb +18 -0
  75. data/spec/scanner/all_spec.rb +18 -0
  76. data/spec/scanner/anchors_spec.rb +21 -0
  77. data/spec/scanner/conditionals_spec.rb +128 -0
  78. data/spec/scanner/delimiters_spec.rb +52 -0
  79. data/spec/scanner/errors_spec.rb +67 -0
  80. data/spec/scanner/escapes_spec.rb +53 -0
  81. data/spec/scanner/free_space_spec.rb +133 -0
  82. data/spec/scanner/groups_spec.rb +52 -0
  83. data/spec/scanner/keep_spec.rb +10 -0
  84. data/spec/scanner/literals_spec.rb +49 -0
  85. data/spec/scanner/meta_spec.rb +18 -0
  86. data/spec/scanner/properties_spec.rb +64 -0
  87. data/spec/scanner/quantifiers_spec.rb +20 -0
  88. data/spec/scanner/refcalls_spec.rb +36 -0
  89. data/spec/scanner/sets_spec.rb +102 -0
  90. data/spec/scanner/types_spec.rb +14 -0
  91. data/spec/spec_helper.rb +15 -0
  92. data/{test → spec}/support/runner.rb +9 -8
  93. data/spec/support/shared_examples.rb +77 -0
  94. data/{test → spec}/support/warning_extractor.rb +5 -7
  95. data/spec/syntax/syntax_spec.rb +48 -0
  96. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  97. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  98. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  99. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  100. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  101. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  102. data/spec/syntax/versions/aliases_spec.rb +37 -0
  103. data/spec/token/token_spec.rb +85 -0
  104. metadata +149 -144
  105. data/test/expression/test_all.rb +0 -12
  106. data/test/expression/test_base.rb +0 -90
  107. data/test/expression/test_clone.rb +0 -89
  108. data/test/expression/test_conditionals.rb +0 -113
  109. data/test/expression/test_free_space.rb +0 -35
  110. data/test/expression/test_set.rb +0 -84
  111. data/test/expression/test_strfregexp.rb +0 -230
  112. data/test/expression/test_subexpression.rb +0 -58
  113. data/test/expression/test_tests.rb +0 -99
  114. data/test/expression/test_to_h.rb +0 -59
  115. data/test/expression/test_to_s.rb +0 -104
  116. data/test/expression/test_traverse.rb +0 -161
  117. data/test/helpers.rb +0 -10
  118. data/test/lexer/test_all.rb +0 -41
  119. data/test/lexer/test_conditionals.rb +0 -127
  120. data/test/lexer/test_keep.rb +0 -24
  121. data/test/lexer/test_literals.rb +0 -130
  122. data/test/lexer/test_nesting.rb +0 -132
  123. data/test/lexer/test_refcalls.rb +0 -56
  124. data/test/parser/set/test_intersections.rb +0 -127
  125. data/test/parser/set/test_ranges.rb +0 -111
  126. data/test/parser/test_all.rb +0 -64
  127. data/test/parser/test_alternation.rb +0 -92
  128. data/test/parser/test_anchors.rb +0 -34
  129. data/test/parser/test_conditionals.rb +0 -187
  130. data/test/parser/test_errors.rb +0 -63
  131. data/test/parser/test_escapes.rb +0 -134
  132. data/test/parser/test_free_space.rb +0 -139
  133. data/test/parser/test_groups.rb +0 -289
  134. data/test/parser/test_keep.rb +0 -21
  135. data/test/parser/test_posix_classes.rb +0 -27
  136. data/test/parser/test_properties.rb +0 -134
  137. data/test/parser/test_quantifiers.rb +0 -301
  138. data/test/parser/test_refcalls.rb +0 -186
  139. data/test/parser/test_sets.rb +0 -179
  140. data/test/parser/test_types.rb +0 -50
  141. data/test/scanner/test_all.rb +0 -38
  142. data/test/scanner/test_anchors.rb +0 -38
  143. data/test/scanner/test_conditionals.rb +0 -184
  144. data/test/scanner/test_errors.rb +0 -91
  145. data/test/scanner/test_escapes.rb +0 -56
  146. data/test/scanner/test_free_space.rb +0 -200
  147. data/test/scanner/test_groups.rb +0 -79
  148. data/test/scanner/test_keep.rb +0 -35
  149. data/test/scanner/test_literals.rb +0 -89
  150. data/test/scanner/test_meta.rb +0 -40
  151. data/test/scanner/test_properties.rb +0 -312
  152. data/test/scanner/test_quantifiers.rb +0 -37
  153. data/test/scanner/test_refcalls.rb +0 -52
  154. data/test/scanner/test_scripts.rb +0 -53
  155. data/test/scanner/test_sets.rb +0 -119
  156. data/test/scanner/test_types.rb +0 -35
  157. data/test/scanner/test_unicode_blocks.rb +0 -30
  158. data/test/support/disable_autotest.rb +0 -8
  159. data/test/syntax/test_all.rb +0 -6
  160. data/test/syntax/test_syntax.rb +0 -61
  161. data/test/syntax/test_syntax_token_map.rb +0 -25
  162. data/test/syntax/versions/test_1.8.rb +0 -55
  163. data/test/syntax/versions/test_1.9.1.rb +0 -36
  164. data/test/syntax/versions/test_1.9.3.rb +0 -32
  165. data/test/syntax/versions/test_2.0.0.rb +0 -37
  166. data/test/syntax/versions/test_2.2.0.rb +0 -32
  167. data/test/syntax/versions/test_aliases.rb +0 -129
  168. data/test/syntax/versions/test_all.rb +0 -5
  169. data/test/test_all.rb +0 -5
  170. data/test/token/test_all.rb +0 -2
  171. data/test/token/test_token.rb +0 -107
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter scanning') do
4
+ include_examples 'scan', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1]
6
+
7
+ include_examples 'scan', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2]
9
+
10
+ include_examples 'scan', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1]
12
+
13
+ include_examples 'scan', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2]
15
+
16
+ include_examples 'scan', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2]
18
+
19
+ include_examples 'scan', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2]
21
+
22
+ include_examples 'scan', '}{+',
23
+ 0 => [:literal, :literal, '}{', 0, 2]
24
+
25
+ include_examples 'scan', '{{var}}',
26
+ 0 => [:literal, :literal, '{{var}}', 0, 7]
27
+
28
+ include_examples 'scan', 'a{1,2',
29
+ 0 => [:literal, :literal, 'a{1,2', 0, 5]
30
+
31
+ include_examples 'scan', '({.+})',
32
+ 0 => [:group, :capture, '(', 0, 1],
33
+ 1 => [:literal, :literal, '{', 1, 2],
34
+ 2 => [:meta, :dot, '.', 2, 3],
35
+ 3 => [:quantifier, :one_or_more, '+', 3, 4],
36
+ 4 => [:literal, :literal, '}', 4, 5],
37
+ 5 => [:group, :close, ')', 5, 6]
38
+
39
+ include_examples 'scan', ']',
40
+ 0 => [:literal, :literal, ']', 0, 1]
41
+
42
+ include_examples 'scan', ']]',
43
+ 0 => [:literal, :literal, ']]', 0, 2]
44
+
45
+ include_examples 'scan', ']\[',
46
+ 0 => [:literal, :literal, ']', 0, 1],
47
+ 1 => [:escape, :set_open, '\[', 1, 3]
48
+
49
+ include_examples 'scan', '()',
50
+ 0 => [:group, :capture, '(', 0, 1],
51
+ 1 => [:group, :close, ')', 1, 2]
52
+ end
@@ -0,0 +1,67 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Scanner) do
4
+ RSpec.shared_examples 'scan error' do |error, issue, source|
5
+ it "raises #{error} for #{issue} `#{source}`" do
6
+ expect { RS.scan(source) }.to raise_error(error)
7
+ end
8
+ end
9
+
10
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
11
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
12
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
13
+ include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
14
+ include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
15
+ include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
16
+ include_examples 'scan error', RS::PrematureEndError, 'eof escape', '\\'
17
+ include_examples 'scan error', RS::PrematureEndError, 'eof in hex escape', '\x'
18
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u'
19
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u0'
20
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u00'
21
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u000'
22
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{'
23
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{00'
24
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000'
25
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 '
26
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 0000'
27
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c'
28
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M'
29
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M-'
30
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C'
31
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-'
32
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M'
33
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M-'
34
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M'
35
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-'
36
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\\'
37
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\c'
38
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C'
39
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C-'
40
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ'
41
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ0'
42
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\cü'
43
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\c\M-ü'
44
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-ü'
45
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-\M-ü'
46
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-ü'
47
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\cü'
48
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\C-ü'
49
+ include_examples 'scan error', RS::ScannerError, 'invalid c-seq', '\Ca'
50
+ include_examples 'scan error', RS::ScannerError, 'invalid m-seq', '\Ma'
51
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?'')"
52
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?''empty-name)"
53
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>)'
54
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>empty-name)'
55
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?foo)'
56
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix abc)'
57
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix^bc'
58
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?)'
59
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-foo)'
60
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-u)'
61
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-mixu)'
62
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k<>'
63
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k\'\''
64
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g<>'
65
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g\'\''
66
+ include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
67
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Escape scanning') do
4
+ include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
+
6
+ # not an escape outside a character set
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
+
9
+ include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
+ include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
11
+ include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
+ include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
+
14
+ include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
15
+
16
+ include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
+ include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
18
+ include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
19
+
20
+ include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
21
+ include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
22
+
23
+ include_examples 'scan', 'a\u0640c', 1 => [:escape, :codepoint, '\u0640', 1, 7]
24
+ include_examples 'scan', 'a\u{640 0641}c', 1 => [:escape, :codepoint_list, '\u{640 0641}', 1, 13]
25
+ include_examples 'scan', 'a\u{10FFFF}c', 1 => [:escape, :codepoint_list, '\u{10FFFF}', 1, 11]
26
+
27
+ include_examples 'scan', /a\cBc/, 1 => [:escape, :control, '\cB', 1, 4]
28
+ include_examples 'scan', /a\c^c/, 1 => [:escape, :control, '\c^', 1, 4]
29
+ include_examples 'scan', /a\c\n/, 1 => [:escape, :control, '\c\n', 1, 5]
30
+ include_examples 'scan', /a\c\\b/, 1 => [:escape, :control, '\c\\\\', 1, 5]
31
+ include_examples 'scan', /a\C-bc/, 1 => [:escape, :control, '\C-b', 1, 5]
32
+ include_examples 'scan', /a\C-^b/, 1 => [:escape, :control, '\C-^', 1, 5]
33
+ include_examples 'scan', /a\C-\nb/, 1 => [:escape, :control, '\C-\n', 1, 6]
34
+ include_examples 'scan', /a\C-\\b/, 1 => [:escape, :control, '\C-\\\\', 1, 6]
35
+ include_examples 'scan', /a\c\M-Bc/n, 1 => [:escape, :control, '\c\M-B', 1, 7]
36
+ include_examples 'scan', /a\C-\M-Bc/n, 1 => [:escape, :control, '\C-\M-B', 1, 8]
37
+
38
+ include_examples 'scan', /a\M-Bc/n, 1 => [:escape, :meta_sequence, '\M-B', 1, 5]
39
+ include_examples 'scan', /a\M-\cBc/n, 1 => [:escape, :meta_sequence, '\M-\cB', 1, 7]
40
+ include_examples 'scan', /a\M-\c^/n, 1 => [:escape, :meta_sequence, '\M-\c^', 1, 7]
41
+ include_examples 'scan', /a\M-\c\n/n, 1 => [:escape, :meta_sequence, '\M-\c\n', 1, 8]
42
+ include_examples 'scan', /a\M-\c\\/n, 1 => [:escape, :meta_sequence, '\M-\c\\\\', 1, 8]
43
+ include_examples 'scan', /a\M-\C-Bc/n, 1 => [:escape, :meta_sequence, '\M-\C-B', 1, 8]
44
+ include_examples 'scan', /a\M-\C-\\/n, 1 => [:escape, :meta_sequence, '\M-\C-\\\\', 1, 9]
45
+
46
+ include_examples 'scan', 'ab\\\xcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
47
+ include_examples 'scan', 'ab\\\0cd', 1 => [:escape, :backslash, '\\\\', 2, 4]
48
+ include_examples 'scan', 'ab\\\Kcd', 1 => [:escape, :backslash, '\\\\', 2, 4]
49
+
50
+ include_examples 'scan', 'ab\^cd', 1 => [:escape, :bol, '\^', 2, 4]
51
+ include_examples 'scan', 'ab\$cd', 1 => [:escape, :eol, '\$', 2, 4]
52
+ include_examples 'scan', 'ab\[cd', 1 => [:escape, :set_open, '\[', 2, 4]
53
+ end
@@ -0,0 +1,133 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('FreeSpace scanning') do
4
+ describe('scan free space tokens') do
5
+ let(:tokens) { RS.scan(/
6
+ a
7
+ b ? c *
8
+ d {2,3}
9
+ e + | f +
10
+ /x) }
11
+
12
+ 0.upto(24).select(&:even?).each do |i|
13
+ it "scans #{i} as free space" do
14
+ expect(tokens[i][0]).to eq :free_space
15
+ expect(tokens[i][1]).to eq :whitespace
16
+ end
17
+ end
18
+ 0.upto(24).reject(&:even?).each do |i|
19
+ it "does not scan #{i} as free space" do
20
+ expect(tokens[i][0]).not_to eq :free_space
21
+ expect(tokens[i][1]).not_to eq :whitespace
22
+ end
23
+ end
24
+
25
+ it 'sets the correct text' do
26
+ [0, 2, 10, 14].each { |i| expect(tokens[i][2]).to eq "\n " }
27
+ [4, 6, 8, 12].each { |i| expect(tokens[i][2]).to eq ' ' }
28
+ end
29
+ end
30
+
31
+ describe('scan free space comments') do
32
+ include_examples 'scan', /
33
+ a + # A + comment
34
+ b ? # B ? comment
35
+ c {2,3} # C {2,3} comment
36
+ d + | e + # D|E comment
37
+ /x,
38
+ 5 => [:free_space, :comment, "# A + comment\n", 11, 25],
39
+ 11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
+ 17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
+ 29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+ end
43
+
44
+ describe('scan free space inlined') do
45
+ include_examples 'scan', /a b(?x:c d e)f g/,
46
+ 0 => [:literal, :literal, 'a b', 0, 3],
47
+ 1 => [:group, :options, '(?x:', 3, 7],
48
+ 2 => [:literal, :literal, 'c', 7, 8],
49
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
50
+ 4 => [:literal, :literal, 'd', 9, 10],
51
+ 5 => [:free_space, :whitespace, ' ', 10, 11],
52
+ 6 => [:literal, :literal, 'e', 11, 12],
53
+ 7 => [:group, :close, ')', 12, 13],
54
+ 8 => [:literal, :literal, 'f g', 13, 16]
55
+ end
56
+
57
+ describe('scan free space nested') do
58
+ include_examples 'scan', /a b(?x:c d(?-x:e f)g h)i j/,
59
+ 0 => [:literal, :literal, 'a b', 0, 3],
60
+ 1 => [:group, :options, '(?x:', 3, 7],
61
+ 2 => [:literal, :literal, 'c', 7, 8],
62
+ 3 => [:free_space, :whitespace, ' ', 8, 9],
63
+ 4 => [:literal, :literal, 'd', 9, 10],
64
+ 5 => [:group, :options, '(?-x:', 10, 15],
65
+ 6 => [:literal, :literal, 'e f', 15, 18],
66
+ 7 => [:group, :close, ')', 18, 19],
67
+ 8 => [:literal, :literal, 'g', 19, 20],
68
+ 9 => [:free_space, :whitespace, ' ', 20, 21],
69
+ 10 => [:literal, :literal, 'h', 21, 22],
70
+ 11 => [:group, :close, ')', 22, 23],
71
+ 12 => [:literal, :literal, 'i j', 23, 26]
72
+ end
73
+
74
+ describe('scan free space nested groups') do
75
+ include_examples 'scan', /(a (b(?x: (c d) (?-x:(e f) )g) h)i j)/,
76
+ 0 => [:group, :capture, '(', 0, 1],
77
+ 1 => [:literal, :literal, 'a ', 1, 3],
78
+ 2 => [:group, :capture, '(', 3, 4],
79
+ 3 => [:literal, :literal, 'b', 4, 5],
80
+ 4 => [:group, :options, '(?x:', 5, 9],
81
+ 5 => [:free_space, :whitespace, ' ', 9, 10],
82
+ 6 => [:group, :capture, '(', 10, 11],
83
+ 7 => [:literal, :literal, 'c', 11, 12],
84
+ 8 => [:free_space, :whitespace, ' ', 12, 13],
85
+ 9 => [:literal, :literal, 'd', 13, 14],
86
+ 10 => [:group, :close, ')', 14, 15],
87
+ 11 => [:free_space, :whitespace, ' ', 15, 16],
88
+ 12 => [:group, :options, '(?-x:', 16, 21],
89
+ 13 => [:group, :capture, '(', 21, 22],
90
+ 14 => [:literal, :literal, 'e f', 22, 25],
91
+ 15 => [:group, :close, ')', 25, 26],
92
+ 16 => [:literal, :literal, ' ', 26, 27],
93
+ 17 => [:group, :close, ')', 27, 28],
94
+ 18 => [:literal, :literal, 'g', 28, 29],
95
+ 19 => [:group, :close, ')', 29, 30],
96
+ 20 => [:literal, :literal, ' h', 30, 32],
97
+ 21 => [:group, :close, ')', 32, 33],
98
+ 22 => [:literal, :literal, 'i j', 33, 36],
99
+ 23 => [:group, :close, ')', 36, 37]
100
+ end
101
+
102
+ describe('scan free space switch groups') do
103
+ include_examples 'scan', /(a (b((?x) (c d) ((?-x)(e f) )g) h)i j)/,
104
+ 0 => [:group, :capture, '(', 0, 1],
105
+ 1 => [:literal, :literal, 'a ', 1, 3],
106
+ 2 => [:group, :capture, '(', 3, 4],
107
+ 3 => [:literal, :literal, 'b', 4, 5],
108
+ 4 => [:group, :capture, '(', 5, 6],
109
+ 5 => [:group, :options_switch, '(?x', 6, 9],
110
+ 6 => [:group, :close, ')', 9, 10],
111
+ 7 => [:free_space, :whitespace, ' ', 10, 11],
112
+ 8 => [:group, :capture, '(', 11, 12],
113
+ 9 => [:literal, :literal, 'c', 12, 13],
114
+ 10 => [:free_space, :whitespace, ' ', 13, 14],
115
+ 11 => [:literal, :literal, 'd', 14, 15],
116
+ 12 => [:group, :close, ')', 15, 16],
117
+ 13 => [:free_space, :whitespace, ' ', 16, 17],
118
+ 14 => [:group, :capture, '(', 17, 18],
119
+ 15 => [:group, :options_switch, '(?-x', 18, 22],
120
+ 16 => [:group, :close, ')', 22, 23],
121
+ 17 => [:group, :capture, '(', 23, 24],
122
+ 18 => [:literal, :literal, 'e f', 24, 27],
123
+ 19 => [:group, :close, ')', 27, 28],
124
+ 20 => [:literal, :literal, ' ', 28, 29],
125
+ 21 => [:group, :close, ')', 29, 30],
126
+ 22 => [:literal, :literal, 'g', 30, 31],
127
+ 23 => [:group, :close, ')', 31, 32],
128
+ 24 => [:literal, :literal, ' h', 32, 34],
129
+ 25 => [:group, :close, ')', 34, 35],
130
+ 26 => [:literal, :literal, 'i j', 35, 38],
131
+ 27 => [:group, :close, ')', 38, 39]
132
+ end
133
+ end
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Group scanning') do
4
+ # Group types
5
+ include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
+ include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
+
8
+ include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
+ include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
+
11
+ include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
+ include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
13
+
14
+ include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
+ include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
16
+ include_examples 'scan', '(?::)', 0 => [:group, :passive, '(?:', 0, 3]
17
+
18
+ # Comments
19
+ include_examples 'scan', '(?#abc)', 0 => [:group, :comment, '(?#abc)', 0, 7]
20
+ include_examples 'scan', '(?#)', 0 => [:group, :comment, '(?#)', 0, 4]
21
+
22
+ # Assertions
23
+ include_examples 'scan', '(?=abc)', 0 => [:assertion, :lookahead, '(?=', 0, 3]
24
+ include_examples 'scan', '(?!abc)', 0 => [:assertion, :nlookahead, '(?!', 0, 3]
25
+ include_examples 'scan', '(?<=abc)', 0 => [:assertion, :lookbehind, '(?<=', 0, 4]
26
+ include_examples 'scan', '(?<!abc)', 0 => [:assertion, :nlookbehind, '(?<!', 0, 4]
27
+
28
+ # Options
29
+ include_examples 'scan', '(?-mix:abc)', 0 => [:group, :options, '(?-mix:', 0, 7]
30
+ include_examples 'scan', '(?m-ix:abc)', 0 => [:group, :options, '(?m-ix:', 0, 7]
31
+ include_examples 'scan', '(?mi-x:abc)', 0 => [:group, :options, '(?mi-x:', 0, 7]
32
+ include_examples 'scan', '(?mix:abc)', 0 => [:group, :options, '(?mix:', 0, 6]
33
+ include_examples 'scan', '(?m:)', 0 => [:group, :options, '(?m:', 0, 4]
34
+ include_examples 'scan', '(?i:)', 0 => [:group, :options, '(?i:', 0, 4]
35
+ include_examples 'scan', '(?x:)', 0 => [:group, :options, '(?x:', 0, 4]
36
+ include_examples 'scan', '(?mix)', 0 => [:group, :options_switch, '(?mix', 0, 5]
37
+ include_examples 'scan', '(?d-mix:abc)', 0 => [:group, :options, '(?d-mix:', 0, 8]
38
+ include_examples 'scan', '(?a-mix:abc)', 0 => [:group, :options, '(?a-mix:', 0, 8]
39
+ include_examples 'scan', '(?u-mix:abc)', 0 => [:group, :options, '(?u-mix:', 0, 8]
40
+ include_examples 'scan', '(?da-m:abc)', 0 => [:group, :options, '(?da-m:', 0, 7]
41
+ include_examples 'scan', '(?du-x:abc)', 0 => [:group, :options, '(?du-x:', 0, 7]
42
+ include_examples 'scan', '(?dau-i:abc)', 0 => [:group, :options, '(?dau-i:', 0, 8]
43
+ include_examples 'scan', '(?dau:abc)', 0 => [:group, :options, '(?dau:', 0, 6]
44
+ include_examples 'scan', '(?d:)', 0 => [:group, :options, '(?d:', 0, 4]
45
+ include_examples 'scan', '(?a:)', 0 => [:group, :options, '(?a:', 0, 4]
46
+ include_examples 'scan', '(?u:)', 0 => [:group, :options, '(?u:', 0, 4]
47
+ include_examples 'scan', '(?dau)', 0 => [:group, :options_switch, '(?dau', 0, 5]
48
+
49
+ if ruby_version_at_least('2.4.1')
50
+ include_examples 'scan', '(?~abc)', 0 => [:group, :absence, '(?~', 0, 3]
51
+ end
52
+ end
@@ -0,0 +1,10 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Keep scanning') do
4
+ include_examples 'scan', /ab\Kcd/,
5
+ 1 => [:keep, :mark, '\K', 2, 4]
6
+
7
+ include_examples 'scan', /(a\Kb)|(c\\\Kd)ef/,
8
+ 2 => [:keep, :mark, '\K', 2, 4],
9
+ 9 => [:keep, :mark, '\K', 11, 13]
10
+ end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('UTF8 scanning') do
4
+ # ascii, single byte characters
5
+ include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
6
+
7
+ include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
+ include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
+
10
+ # 2 byte wide characters, Arabic
11
+ include_examples 'scan', 'aاbبcت', 0 => [:literal, :literal, 'aاbبcت', 0, 9]
12
+
13
+ include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
14
+ include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
+
16
+ include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
17
+ include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
+ include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
19
+ include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
+
21
+ include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
22
+ include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
23
+ include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
24
+ include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
+ include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
26
+ include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
27
+ include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
28
+
29
+ # 3 byte wide characters, Japanese
30
+ include_examples 'scan', 'ab?れます+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
+ include_examples 'scan', 'ab?れます+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
+ include_examples 'scan', 'ab?れます+cd', 2 => [:literal, :literal, 'れます', 3, 12]
33
+ include_examples 'scan', 'ab?れます+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
+ include_examples 'scan', 'ab?れます+cd', 4 => [:literal, :literal, 'cd', 13, 15]
35
+
36
+ # 4 byte wide characters, Osmanya
37
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 0 => [:literal, :literal, '𐒀𐒁', 0, 8]
38
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 2 => [:literal, :literal, '𐒂ab', 9, 15]
40
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 4 => [:literal, :literal, '𐒃', 16, 20]
42
+
43
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 0 => [:literal, :literal, 'mu𝄞', 0, 6]
44
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 2 => [:literal, :literal, 'si', 7, 9]
46
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 4 => [:literal, :literal, '𝄫c', 10, 15]
48
+ include_examples 'scan', 'mu𝄞?si*𝄫c+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
49
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Meta scanning') do
4
+ include_examples 'scan', /abc??|def*+|ghi+/,
5
+ 0 => [:literal, :literal, 'abc', 0, 3],
6
+ 1 => [:quantifier, :zero_or_one_reluctant, '??', 3, 5],
7
+ 2 => [:meta, :alternation, '|', 5, 6],
8
+ 3 => [:literal, :literal, 'def', 6, 9],
9
+ 4 => [:quantifier, :zero_or_more_possessive, '*+', 9, 11],
10
+ 5 => [:meta, :alternation, '|', 11, 12]
11
+
12
+ include_examples 'scan', /(a\|b)|(c|d)\|(e[|]f)/,
13
+ 2 => [:escape, :alternation, '\|', 2, 4],
14
+ 5 => [:meta, :alternation, '|', 6, 7],
15
+ 8 => [:meta, :alternation, '|', 9, 10],
16
+ 11 => [:escape, :alternation, '\|', 12, 14],
17
+ 15 => [:literal, :literal, '|', 17, 18]
18
+ end
@@ -0,0 +1,64 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Property scanning') do
4
+ RSpec.shared_examples 'scan property' do |text, token|
5
+ it("scans \\p{#{text}} as property #{token}") do
6
+ result = RS.scan("\\p{#{text}}")[0]
7
+ expect(result[0..1]).to eq [:property, token]
8
+ end
9
+
10
+ it("scans \\P{#{text}} as nonproperty #{token}") do
11
+ result = RS.scan("\\P{#{text}}")[0]
12
+ expect(result[0..1]).to eq [:nonproperty, token]
13
+ end
14
+
15
+ it("scans \\p{^#{text}} as nonproperty #{token}") do
16
+ result = RS.scan("\\p{^#{text}}")[0]
17
+ expect(result[0..1]).to eq [:nonproperty, token]
18
+ end
19
+
20
+ it("scans double-negated \\P{^#{text}} as property #{token}") do
21
+ result = RS.scan("\\P{^#{text}}")[0]
22
+ expect(result[0..1]).to eq [:property, token]
23
+ end
24
+ end
25
+
26
+ include_examples 'scan property', 'Alnum', :alnum
27
+
28
+ include_examples 'scan property', 'XPosixPunct', :xposixpunct
29
+
30
+ include_examples 'scan property', 'Newline', :newline
31
+
32
+ include_examples 'scan property', 'Any', :any
33
+
34
+ include_examples 'scan property', 'Assigned', :assigned
35
+
36
+ include_examples 'scan property', 'Age=1.1', :'age=1.1'
37
+ include_examples 'scan property', 'Age=10.0', :'age=10.0'
38
+
39
+ include_examples 'scan property', 'ahex', :ascii_hex_digit
40
+ include_examples 'scan property', 'ASCII_Hex_Digit', :ascii_hex_digit # test underscore
41
+
42
+ include_examples 'scan property', 'sd', :soft_dotted
43
+ include_examples 'scan property', 'Soft-Dotted', :soft_dotted # test dash
44
+
45
+ include_examples 'scan property', 'Egyp', :egyptian_hieroglyphs
46
+ include_examples 'scan property', 'Egyptian Hieroglyphs', :egyptian_hieroglyphs # test whitespace
47
+
48
+ include_examples 'scan property', 'Linb', :linear_b
49
+ include_examples 'scan property', 'Linear-B', :linear_b # test dash
50
+
51
+ include_examples 'scan property', 'InArabic', :in_arabic # test block
52
+ include_examples 'scan property', 'in Arabic', :in_arabic # test block w. whitespace
53
+ include_examples 'scan property', 'In_Arabic', :in_arabic # test block w. underscore
54
+
55
+ include_examples 'scan property', 'Yiii', :yi
56
+ include_examples 'scan property', 'Yi', :yi
57
+
58
+ include_examples 'scan property', 'Zinh', :inherited
59
+ include_examples 'scan property', 'Inherited', :inherited
60
+ include_examples 'scan property', 'Qaai', :inherited
61
+
62
+ include_examples 'scan property', 'Zzzz', :unknown
63
+ include_examples 'scan property', 'Unknown', :unknown
64
+ end